diff variant_effect_predictor/Bio/Tools/RestrictionEnzyme.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Tools/RestrictionEnzyme.pm	Thu Apr 11 06:29:17 2013 -0400
@@ -0,0 +1,1247 @@
+#------------------------------------------------------------------
+# $Id: RestrictionEnzyme.pm,v 1.25.2.1 2003/06/29 00:53:20 jason Exp $
+#
+# BioPerl module Bio::Tools::RestrictionEnzyme
+#
+# Cared for by Steve Chervitz <sac@bioperl.org>
+#
+# You may distribute this module under the same terms as perl itself
+#------------------------------------------------------------------
+
+## POD Documentation:
+
+=head1 NAME
+
+Bio::Tools::RestrictionEnzyme - Bioperl object for a restriction endonuclease
+(cuts DNA at specific locations)
+
+=head1 SYNOPSIS
+
+=head2 Object Creation
+
+    require Bio::Tools::RestrictionEnzyme;
+
+    ## Create a new object by name.
+
+    $re1 = new Bio::Tools::RestrictionEnzyme(-NAME =>'EcoRI');
+
+    ## Create a new object using special syntax
+    ## which specifies the enzyme name, recognition site, and cut position.
+    ## Used for enzymes not known to this module.
+
+    $re2 = new Bio::Tools::RestrictionEnzyme(-NAME =>'EcoRV--GAT^ATC', 
+				  	     -MAKE =>'custom');
+
+    ## Get a list of the resulting fragments when a sequence is cut with
+    ## the given enzyme. The method expects a Bio::Seq object.
+
+    @fragments = $re2->cut_seq($seqobj);
+
+    ## Get a list of names of all available restriction enzymes 
+    ## known to this module.
+
+    @all = $re->available_list();
+
+    ## Get the names of restriction enzymes that have 6 bp 
+    ## recognition sequences.
+
+    @sixcutters = $re->available_list(6);
+
+
+=head1 INSTALLATION
+
+This module is included with the central Bioperl distribution:
+
+   http://bio.perl.org/Core/Latest
+   ftp://bio.perl.org/pub/DIST
+
+Follow the installation instructions included in the README file.
+
+=head1 DESCRIPTION
+
+The Bio::Tools::RestrictionEnzyme.pm module encapsulates generic data and 
+methods for using restriction endonucleases for in silico restriction
+analysis of DNA sequences.
+
+=head2 Considerations
+
+This module is a precursor for a more full featured version that may do such
+things as download data from online databases such as REBase http://www.neb.com/rebase/.
+Thus, there is currently no functionality for obtaining data about commercial
+availability for a restriction enzyme.
+
+At some point in the future, it may make sense to derive RestrictionEnzymes
+from a class such as Bio::Enzyme or Bio::Prot::Protein (neither of which now
+exist) so that more data about the enzyme and related information can be 
+easily obtained.
+
+This module is currently in use at 
+
+ http://genome-www.stanford.edu/Sacch3D/analysis/
+
+
+=head2 Digesting on Runs of N
+
+To digest a sequence on runs of N's in the sequence. Here's what you can do:
+
+    $re_n  = new Bio::Tools::RestrictionEnzyme(-name=>'N--NNNNN',
+                                               -make=>'custom');
+
+Specify the number of N's you want to match in the -name parameter. 
+So the above example will recognize and cut at runs of 5 Ns.
+ If you wanted to cut at runs of 10 N's, you would use 
+
+     -name => 'N--NNNNNNNNNN'
+
+Note that you must use a specific number of N's, you cannot use a regexp to
+digest at N+ for example, because the actual number of N's at each site are
+not recorded when the sequence is analyzed. So cut_locations( ) wouldn't be 
+correct. 
+
+=head1 EXAMPLES
+
+See the script examples/restriction.pl in the Bioperl distribution.
+
+=head1 DEPENDENCIES 
+
+Bio::Tools::RestrictionEnzyme.pm is a concrete class that inherits from 
+B<Bio::Root::Root> and uses by delegation B<Bio::PrimarySeq>.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists 
+
+User feedback is an integral part of the evolution of this and other Bioperl
+modules. Send your comments and suggestions preferably to one of the Bioperl
+mailing lists. Your participation is much appreciated.
+
+   bioperl-l@bioperl.org             - General discussion
+   http://bioperl.org/MailList.shtml - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track the bugs
+and their resolution. Bug reports can be submitted via email or the web:
+
+    bioperl-bugs@bio.perl.org
+    http://bugzilla.bioperl.org/
+
+=head1 AUTHOR
+
+Steve Chervitz, E<lt>sac@bioperl.orgE<gt>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2002 Steve A. Chervitz. All Rights Reserved.
+This module is free software; you can redistribute it and/or 
+modify it under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+  Bio::Root::Root    - Base class.
+  Bio::PrimarySeq    - Lightweight sequence object.
+
+  http://bio.perl.org/  - Bioperl Project Homepage
+
+=cut
+
+#
+##
+###
+#### END of main POD documentation.
+###
+##
+#'
+
+
+=head1 APPENDIX
+
+Methods beginning with a leading underscore are considered private
+and are intended for internal use by this module. They are
+B<not> considered part of the public interface and are described here
+for documentation purposes only.
+
+=cut
+
+
+package Bio::Tools::RestrictionEnzyme;
+use strict;
+
+use Bio::Root::Root;
+use Exporter;
+
+use vars qw (@ISA @EXPORT_OK %EXPORT_TAGS $ID $version @RE_available $Revision);
+
+@ISA         = qw(Bio::Root::Root Exporter);
+@EXPORT_OK   = qw(@RE_available);
+%EXPORT_TAGS = ( std => [qw(@RE_available)] );
+
+$ID = 'Bio::Tools::RestrictionEnzyme';
+$version = 0.04;
+$Revision = '$Id: RestrictionEnzyme.pm,v 1.25.2.1 2003/06/29 00:53:20 jason Exp $';  #'
+
+# Generated from REBASE version 208 (strider format), dated Aug 1 2002
+# using scripts/contributed/rebase2list.pl
+# Syntax: RE-name => 'SITE CUTS-AT' where SITE and CUTS-AT are separated 
+# by a space.
+
+my %RE = (
+ 'AasI'	=> 'GACNNNNNNGTC 7',
+ 'AatI'	=> 'AGGCCT 3',
+ 'AatII'	=> 'GACGTC 5',
+ 'AauI'	=> 'TGTACA 1',
+ 'AccI'	=> 'GTMKAC 2',
+ 'AccII'	=> 'CGCG 2',
+ 'AccIII'	=> 'TCCGGA 1',
+ 'Acc16I'	=> 'TGCGCA 3',
+ 'Acc65I'	=> 'GGTACC 1',
+ 'Acc113I'	=> 'AGTACT 3',
+ 'AccB1I'	=> 'GGYRCC 1',
+ 'AccB7I'	=> 'CCANNNNNTGG 7',
+ 'AclI'	=> 'AACGTT 2',
+ 'AcsI'	=> 'RAATTY 1',
+ 'AcvI'	=> 'CACGTG 3',
+ 'AcyI'	=> 'GRCGYC 2',
+ 'AdeI'	=> 'CACNNNGTG 6',
+ 'AfaI'	=> 'GTAC 2',
+ 'AfeI'	=> 'AGCGCT 3',
+ 'AflI'	=> 'GGWCC 1',
+ 'AflII'	=> 'CTTAAG 1',
+ 'AflIII'	=> 'ACRYGT 1',
+ 'AgeI'	=> 'ACCGGT 1',
+ 'AhaIII'	=> 'TTTAAA 3',
+ 'AhdI'	=> 'GACNNNNNGTC 6',
+ 'AhlI'	=> 'ACTAGT 1',
+ 'AleI'	=> 'CACNNNNGTG 5',
+ 'AluI'	=> 'AGCT 2',
+ 'Alw21I'	=> 'GWGCWC 5',
+ 'Alw44I'	=> 'GTGCAC 1',
+ 'AlwNI'	=> 'CAGNNNCTG 6',
+ 'Ama87I'	=> 'CYCGRG 1',
+ 'AocI'	=> 'CCTNAGG 2',
+ 'Aor51HI'	=> 'AGCGCT 3',
+ 'ApaI'	=> 'GGGCCC 5',
+ 'ApaBI'	=> 'GCANNNNNTGC 8',
+ 'ApaLI'	=> 'GTGCAC 1',
+ 'ApoI'	=> 'RAATTY 1',
+ 'AscI'	=> 'GGCGCGCC 2',
+ 'AseI'	=> 'ATTAAT 2',
+ 'AsiAI'	=> 'ACCGGT 1',
+ 'AsiSI'	=> 'GCGATCGC 5',
+ 'AsnI'	=> 'ATTAAT 2',
+ 'AspI'	=> 'GACNNNGTC 4',
+ 'Asp700I'	=> 'GAANNNNTTC 5',
+ 'Asp718I'	=> 'GGTACC 1',
+ 'AspEI'	=> 'GACNNNNNGTC 6',
+ 'AspHI'	=> 'GWGCWC 5',
+ 'AspLEI'	=> 'GCGC 3',
+ 'AspS9I'	=> 'GGNCC 1',
+ 'AsuI'	=> 'GGNCC 1',
+ 'AsuII'	=> 'TTCGAA 2',
+ 'AsuC2I'	=> 'CCSGG 2',
+ 'AsuNHI'	=> 'GCTAGC 1',
+ 'AvaI'	=> 'CYCGRG 1',
+ 'AvaII'	=> 'GGWCC 1',
+ 'AviII'	=> 'TGCGCA 3',
+ 'AvrII'	=> 'CCTAGG 1',
+ 'AxyI'	=> 'CCTNAGG 2',
+ 'BalI'	=> 'TGGCCA 3',
+ 'BamHI'	=> 'GGATCC 1',
+ 'BanI'	=> 'GGYRCC 1',
+ 'BanII'	=> 'GRGCYC 5',
+ 'BanIII'	=> 'ATCGAT 2',
+ 'BbeI'	=> 'GGCGCC 5',
+ 'BbrPI'	=> 'CACGTG 3',
+ 'BbuI'	=> 'GCATGC 5',
+ 'Bbv12I'	=> 'GWGCWC 5',
+ 'BclI'	=> 'TGATCA 1',
+ 'BcnI'	=> 'CCSGG 2',
+ 'BcoI'	=> 'CYCGRG 1',
+ 'BcuI'	=> 'ACTAGT 1',
+ 'BetI'	=> 'WCCGGW 1',
+ 'BfaI'	=> 'CTAG 1',
+ 'BfmI'	=> 'CTRYAG 1',
+ 'BfrI'	=> 'CTTAAG 1',
+ 'BfrBI'	=> 'ATGCAT 3',
+ 'BfuCI'	=> 'GATC 0',
+ 'BglI'	=> 'GCCNNNNNGGC 7',
+ 'BglII'	=> 'AGATCT 1',
+ 'BlnI'	=> 'CCTAGG 1',
+ 'BloHII'	=> 'CTGCAG 5',
+ 'BlpI'	=> 'GCTNAGC 2',
+ 'Bme18I'	=> 'GGWCC 1',
+ 'Bme1390I'	=> 'CCNGG 2',
+ 'Bme1580I'	=> 'GKGCMC 5',
+ 'BmtI'	=> 'GCTAGC 5',
+ 'BmyI'	=> 'GDGCHC 5',
+ 'BoxI'	=> 'GACNNNNGTC 5',
+ 'Bpu14I'	=> 'TTCGAA 2',
+ 'Bpu1102I'	=> 'GCTNAGC 2',
+ 'Bsa29I'	=> 'ATCGAT 2',
+ 'BsaAI'	=> 'YACGTR 3',
+ 'BsaBI'	=> 'GATNNNNATC 5',
+ 'BsaHI'	=> 'GRCGYC 2',
+ 'BsaJI'	=> 'CCNNGG 1',
+ 'BsaOI'	=> 'CGRYCG 4',
+ 'BsaWI'	=> 'WCCGGW 1',
+ 'BscI'	=> 'ATCGAT 2',
+ 'Bsc4I'	=> 'CCNNNNNNNGG 7',
+ 'BscBI'	=> 'GGNNCC 3',
+ 'BscFI'	=> 'GATC 0',
+ 'Bse8I'	=> 'GATNNNNATC 5',
+ 'Bse21I'	=> 'CCTNAGG 2',
+ 'Bse118I'	=> 'RCCGGY 1',
+ 'BseAI'	=> 'TCCGGA 1',
+ 'BseBI'	=> 'CCWGG 2',
+ 'BseCI'	=> 'ATCGAT 2',
+ 'BseDI'	=> 'CCNNGG 1',
+ 'BseJI'	=> 'GATNNNNATC 5',
+ 'BseLI'	=> 'CCNNNNNNNGG 7',
+ 'BsePI'	=> 'GCGCGC 1',
+ 'BseSI'	=> 'GKGCMC 5',
+ 'BseX3I'	=> 'CGGCCG 1',
+ 'BshI'	=> 'GGCC 2',
+ 'Bsh1236I'	=> 'CGCG 2',
+ 'Bsh1285I'	=> 'CGRYCG 4',
+ 'BshFI'	=> 'GGCC 2',
+ 'BshNI'	=> 'GGYRCC 1',
+ 'BshTI'	=> 'ACCGGT 1',
+ 'BsiBI'	=> 'GATNNNNATC 5',
+ 'BsiCI'	=> 'TTCGAA 2',
+ 'BsiEI'	=> 'CGRYCG 4',
+ 'BsiHKAI'	=> 'GWGCWC 5',
+ 'BsiHKCI'	=> 'CYCGRG 1',
+ 'BsiLI'	=> 'CCWGG 2',
+ 'BsiMI'	=> 'TCCGGA 1',
+ 'BsiQI'	=> 'TGATCA 1',
+ 'BsiSI'	=> 'CCGG 1',
+ 'BsiWI'	=> 'CGTACG 1',
+ 'BsiXI'	=> 'ATCGAT 2',
+ 'BsiYI'	=> 'CCNNNNNNNGG 7',
+ 'BsiZI'	=> 'GGNCC 1',
+ 'BslI'	=> 'CCNNNNNNNGG 7',
+ 'BsoBI'	=> 'CYCGRG 1',
+ 'Bsp13I'	=> 'TCCGGA 1',
+ 'Bsp19I'	=> 'CCATGG 1',
+ 'Bsp68I'	=> 'TCGCGA 3',
+ 'Bsp106I'	=> 'ATCGAT 2',
+ 'Bsp119I'	=> 'TTCGAA 2',
+ 'Bsp120I'	=> 'GGGCCC 1',
+ 'Bsp143I'	=> 'GATC 0',
+ 'Bsp143II'	=> 'RGCGCY 5',
+ 'Bsp1286I'	=> 'GDGCHC 5',
+ 'Bsp1407I'	=> 'TGTACA 1',
+ 'Bsp1720I'	=> 'GCTNAGC 2',
+ 'BspA2I'	=> 'CCTAGG 1',
+ 'BspCI'	=> 'CGATCG 4',
+ 'BspDI'	=> 'ATCGAT 2',
+ 'BspEI'	=> 'TCCGGA 1',
+ 'BspHI'	=> 'TCATGA 1',
+ 'BspLI'	=> 'GGNNCC 3',
+ 'BspLU11I'	=> 'ACATGT 1',
+ 'BspMII'	=> 'TCCGGA 1',
+ 'BspTI'	=> 'CTTAAG 1',
+ 'BspT104I'	=> 'TTCGAA 2',
+ 'BspT107I'	=> 'GGYRCC 1',
+ 'BspXI'	=> 'ATCGAT 2',
+ 'BsrBRI'	=> 'GATNNNNATC 5',
+ 'BsrFI'	=> 'RCCGGY 1',
+ 'BsrGI'	=> 'TGTACA 1',
+ 'BssAI'	=> 'RCCGGY 1',
+ 'BssECI'	=> 'CCNNGG 1',
+ 'BssHI'	=> 'CTCGAG 1',
+ 'BssHII'	=> 'GCGCGC 1',
+ 'BssKI'	=> 'CCNGG 0',
+ 'BssNAI'	=> 'GTATAC 3',
+ 'BssT1I'	=> 'CCWWGG 1',
+ 'Bst98I'	=> 'CTTAAG 1',
+ 'Bst1107I'	=> 'GTATAC 3',
+ 'BstACI'	=> 'GRCGYC 2',
+ 'BstAPI'	=> 'GCANNNNNTGC 7',
+ 'BstBI'	=> 'TTCGAA 2',
+ 'BstBAI'	=> 'YACGTR 3',
+ 'Bst4CI'	=> 'ACNGT 3',
+ 'BstC8I'	=> 'GCNNGC 3',
+ 'BstDEI'	=> 'CTNAG 1',
+ 'BstDSI'	=> 'CCRYGG 1',
+ 'BstEII'	=> 'GGTNACC 1',
+ 'BstENI'	=> 'CCTNNNNNAGG 5',
+ 'BstENII'	=> 'GATC 0',
+ 'BstFNI'	=> 'CGCG 2',
+ 'BstH2I'	=> 'RGCGCY 5',
+ 'BstHHI'	=> 'GCGC 3',
+ 'BstHPI'	=> 'GTTAAC 3',
+ 'BstKTI'	=> 'GATC 3',
+ 'BstMAI'	=> 'CTGCAG 5',
+ 'BstMCI'	=> 'CGRYCG 4',
+ 'BstMWI'	=> 'GCNNNNNNNGC 7',
+ 'BstNI'	=> 'CCWGG 2',
+ 'BstNSI'	=> 'RCATGY 5',
+ 'BstOI'	=> 'CCWGG 2',
+ 'BstPI'	=> 'GGTNACC 1',
+ 'BstPAI'	=> 'GACNNNNGTC 5',
+ 'BstSCI'	=> 'CCNGG 0',
+ 'BstSFI'	=> 'CTRYAG 1',
+ 'BstSNI'	=> 'TACGTA 3',
+ 'BstUI'	=> 'CGCG 2',
+ 'Bst2UI'	=> 'CCWGG 2',
+ 'BstXI'	=> 'CCANNNNNNTGG 8',
+ 'BstX2I'	=> 'RGATCY 1',
+ 'BstYI'	=> 'RGATCY 1',
+ 'BstZI'	=> 'CGGCCG 1',
+ 'BstZ17I'	=> 'GTATAC 3',
+ 'Bsu15I'	=> 'ATCGAT 2',
+ 'Bsu36I'	=> 'CCTNAGG 2',
+ 'BsuRI'	=> 'GGCC 2',
+ 'BsuTUI'	=> 'ATCGAT 2',
+ 'BtgI'	=> 'CCRYGG 1',
+ 'BthCI'	=> 'GCNGC 4',
+ 'Cac8I'	=> 'GCNNGC 3',
+ 'CaiI'	=> 'CAGNNNCTG 6',
+ 'CauII'	=> 'CCSGG 2',
+ 'CciNI'	=> 'GCGGCCGC 2',
+ 'CelII'	=> 'GCTNAGC 2',
+ 'CfoI'	=> 'GCGC 3',
+ 'CfrI'	=> 'YGGCCR 1',
+ 'Cfr9I'	=> 'CCCGGG 1',
+ 'Cfr10I'	=> 'RCCGGY 1',
+ 'Cfr13I'	=> 'GGNCC 1',
+ 'Cfr42I'	=> 'CCGCGG 4',
+ 'ChaI'	=> 'GATC 4',
+ 'ClaI'	=> 'ATCGAT 2',
+ 'CpoI'	=> 'CGGWCCG 2',
+ 'CspI'	=> 'CGGWCCG 2',
+ 'Csp6I'	=> 'GTAC 1',
+ 'Csp45I'	=> 'TTCGAA 2',
+ 'CspAI'	=> 'ACCGGT 1',
+ 'CviAII'	=> 'CATG 1',
+ 'CviJI'	=> 'RGCY 2',
+ 'CviRI'	=> 'TGCA 2',
+ 'CviTI'	=> 'RGCY 2',
+ 'CvnI'	=> 'CCTNAGG 2',
+ 'DdeI'	=> 'CTNAG 1',
+ 'DpnI'	=> 'GATC 2',
+ 'DpnII'	=> 'GATC 0',
+ 'DraI'	=> 'TTTAAA 3',
+ 'DraII'	=> 'RGGNCCY 2',
+ 'DraIII'	=> 'CACNNNGTG 6',
+ 'DrdI'	=> 'GACNNNNNNGTC 7',
+ 'DsaI'	=> 'CCRYGG 1',
+ 'DseDI'	=> 'GACNNNNNNGTC 7',
+ 'EaeI'	=> 'YGGCCR 1',
+ 'EagI'	=> 'CGGCCG 1',
+ 'Eam1105I'	=> 'GACNNNNNGTC 6',
+ 'Ecl136II'	=> 'GAGCTC 3',
+ 'EclHKI'	=> 'GACNNNNNGTC 6',
+ 'EclXI'	=> 'CGGCCG 1',
+ 'Eco24I'	=> 'GRGCYC 5',
+ 'Eco32I'	=> 'GATATC 3',
+ 'Eco47I'	=> 'GGWCC 1',
+ 'Eco47III'	=> 'AGCGCT 3',
+ 'Eco52I'	=> 'CGGCCG 1',
+ 'Eco72I'	=> 'CACGTG 3',
+ 'Eco81I'	=> 'CCTNAGG 2',
+ 'Eco88I'	=> 'CYCGRG 1',
+ 'Eco91I'	=> 'GGTNACC 1',
+ 'Eco105I'	=> 'TACGTA 3',
+ 'Eco130I'	=> 'CCWWGG 1',
+ 'Eco147I'	=> 'AGGCCT 3',
+ 'EcoHI'	=> 'CCSGG 0',
+ 'EcoICRI'	=> 'GAGCTC 3',
+ 'EcoNI'	=> 'CCTNNNNNAGG 5',
+ 'EcoO65I'	=> 'GGTNACC 1',
+ 'EcoO109I'	=> 'RGGNCCY 2',
+ 'EcoRI'	=> 'GAATTC 1',
+ 'EcoRII'	=> 'CCWGG 0',
+ 'EcoRV'	=> 'GATATC 3',
+ 'EcoT14I'	=> 'CCWWGG 1',
+ 'EcoT22I'	=> 'ATGCAT 5',
+ 'EcoT38I'	=> 'GRGCYC 5',
+ 'EgeI'	=> 'GGCGCC 3',
+ 'EheI'	=> 'GGCGCC 3',
+ 'ErhI'	=> 'CCWWGG 1',
+ 'EsaBC3I'	=> 'TCGA 2',
+ 'EspI'	=> 'GCTNAGC 2',
+ 'FatI'	=> 'CATG 0',
+ 'FauNDI'	=> 'CATATG 2',
+ 'FbaI'	=> 'TGATCA 1',
+ 'FblI'	=> 'GTMKAC 2',
+ 'FmuI'	=> 'GGNCC 4',
+ 'FnuDII'	=> 'CGCG 2',
+ 'Fnu4HI'	=> 'GCNGC 2',
+ 'FriOI'	=> 'GRGCYC 5',
+ 'FseI'	=> 'GGCCGGCC 6',
+ 'FspI'	=> 'TGCGCA 3',
+ 'FspAI'	=> 'RTGCGCAY 4',
+ 'Fsp4HI'	=> 'GCNGC 2',
+ 'FunI'	=> 'AGCGCT 3',
+ 'FunII'	=> 'GAATTC 1',
+ 'HaeI'	=> 'WGGCCW 3',
+ 'HaeII'	=> 'RGCGCY 5',
+ 'HaeIII'	=> 'GGCC 2',
+ 'HapII'	=> 'CCGG 1',
+ 'HgiAI'	=> 'GWGCWC 5',
+ 'HgiCI'	=> 'GGYRCC 1',
+ 'HgiJII'	=> 'GRGCYC 5',
+ 'HhaI'	=> 'GCGC 3',
+ 'Hin1I'	=> 'GRCGYC 2',
+ 'Hin6I'	=> 'GCGC 1',
+ 'HinP1I'	=> 'GCGC 1',
+ 'HincII'	=> 'GTYRAC 3',
+ 'HindII'	=> 'GTYRAC 3',
+ 'HindIII'	=> 'AAGCTT 1',
+ 'HinfI'	=> 'GANTC 1',
+ 'HpaI'	=> 'GTTAAC 3',
+ 'HpaII'	=> 'CCGG 1',
+ 'Hpy8I'	=> 'GTNNAC 3',
+ 'Hpy99I'	=> 'CGWCG 5',
+ 'Hpy178III'	=> 'TCNNGA 2',
+ 'Hpy188I'	=> 'TCNGA 3',
+ 'Hpy188III'	=> 'TCNNGA 2',
+ 'HpyCH4I'	=> 'CATG 3',
+ 'HpyCH4III'	=> 'ACNGT 3',
+ 'HpyCH4IV'	=> 'ACGT 1',
+ 'HpyCH4V'	=> 'TGCA 2',
+ 'HpyF10VI'	=> 'GCNNNNNNNGC 8',
+ 'Hsp92I'	=> 'GRCGYC 2',
+ 'Hsp92II'	=> 'CATG 4',
+ 'HspAI'	=> 'GCGC 1',
+ 'ItaI'	=> 'GCNGC 2',
+ 'KasI'	=> 'GGCGCC 1',
+ 'KpnI'	=> 'GGTACC 5',
+ 'Kpn2I'	=> 'TCCGGA 1',
+ 'KspI'	=> 'CCGCGG 4',
+ 'Ksp22I'	=> 'TGATCA 1',
+ 'KspAI'	=> 'GTTAAC 3',
+ 'Kzo9I'	=> 'GATC 0',
+ 'LpnI'	=> 'RGCGCY 3',
+ 'LspI'	=> 'TTCGAA 2',
+ 'MabI'	=> 'ACCWGGT 1',
+ 'MaeI'	=> 'CTAG 1',
+ 'MaeII'	=> 'ACGT 1',
+ 'MaeIII'	=> 'GTNAC 0',
+ 'MamI'	=> 'GATNNNNATC 5',
+ 'MboI'	=> 'GATC 0',
+ 'McrI'	=> 'CGRYCG 4',
+ 'MfeI'	=> 'CAATTG 1',
+ 'MflI'	=> 'RGATCY 1',
+ 'MhlI'	=> 'GDGCHC 5',
+ 'MlsI'	=> 'TGGCCA 3',
+ 'MluI'	=> 'ACGCGT 1',
+ 'MluNI'	=> 'TGGCCA 3',
+ 'Mly113I'	=> 'GGCGCC 2',
+ 'Mph1103I'	=> 'ATGCAT 5',
+ 'MroI'	=> 'TCCGGA 1',
+ 'MroNI'	=> 'GCCGGC 1',
+ 'MroXI'	=> 'GAANNNNTTC 5',
+ 'MscI'	=> 'TGGCCA 3',
+ 'MseI'	=> 'TTAA 1',
+ 'MslI'	=> 'CAYNNNNRTG 5',
+ 'MspI'	=> 'CCGG 1',
+ 'Msp20I'	=> 'TGGCCA 3',
+ 'MspA1I'	=> 'CMGCKG 3',
+ 'MspCI'	=> 'CTTAAG 1',
+ 'MspR9I'	=> 'CCNGG 2',
+ 'MssI'	=> 'GTTTAAAC 4',
+ 'MstI'	=> 'TGCGCA 3',
+ 'MunI'	=> 'CAATTG 1',
+ 'MvaI'	=> 'CCWGG 2',
+ 'MvnI'	=> 'CGCG 2',
+ 'MwoI'	=> 'GCNNNNNNNGC 7',
+ 'NaeI'	=> 'GCCGGC 3',
+ 'NarI'	=> 'GGCGCC 2',
+ 'NciI'	=> 'CCSGG 2',
+ 'NcoI'	=> 'CCATGG 1',
+ 'NdeI'	=> 'CATATG 2',
+ 'NdeII'	=> 'GATC 0',
+ 'NgoAIV'	=> 'GCCGGC 1',
+ 'NgoMIV'	=> 'GCCGGC 1',
+ 'NheI'	=> 'GCTAGC 1',
+ 'NlaIII'	=> 'CATG 4',
+ 'NlaIV'	=> 'GGNNCC 3',
+ 'Nli3877I'	=> 'CYCGRG 5',
+ 'NmuCI'	=> 'GTSAC 0',
+ 'NotI'	=> 'GCGGCCGC 2',
+ 'NruI'	=> 'TCGCGA 3',
+ 'NruGI'	=> 'GACNNNNNGTC 6',
+ 'NsbI'	=> 'TGCGCA 3',
+ 'NsiI'	=> 'ATGCAT 5',
+ 'NspI'	=> 'RCATGY 5',
+ 'NspIII'	=> 'CYCGRG 1',
+ 'NspV'	=> 'TTCGAA 2',
+ 'NspBII'	=> 'CMGCKG 3',
+ 'OliI'	=> 'CACNNNNGTG 5',
+ 'PacI'	=> 'TTAATTAA 5',
+ 'PaeI'	=> 'GCATGC 5',
+ 'PaeR7I'	=> 'CTCGAG 1',
+ 'PagI'	=> 'TCATGA 1',
+ 'PalI'	=> 'GGCC 2',
+ 'PauI'	=> 'GCGCGC 1',
+ 'PceI'	=> 'AGGCCT 3',
+ 'PciI'	=> 'ACATGT 1',
+ 'PdiI'	=> 'GCCGGC 3',
+ 'PdmI'	=> 'GAANNNNTTC 5',
+ 'Pfl23II'	=> 'CGTACG 1',
+ 'PflBI'	=> 'CCANNNNNTGG 7',
+ 'PflFI'	=> 'GACNNNGTC 4',
+ 'PflMI'	=> 'CCANNNNNTGG 7',
+ 'PfoI'	=> 'TCCNGGA 1',
+ 'PinAI'	=> 'ACCGGT 1',
+ 'Ple19I'	=> 'CGATCG 4',
+ 'PmaCI'	=> 'CACGTG 3',
+ 'PmeI'	=> 'GTTTAAAC 4',
+ 'PmlI'	=> 'CACGTG 3',
+ 'Ppu10I'	=> 'ATGCAT 1',
+ 'PpuMI'	=> 'RGGWCCY 2',
+ 'PpuXI'	=> 'RGGWCCY 2',
+ 'PshAI'	=> 'GACNNNNGTC 5',
+ 'PshBI'	=> 'ATTAAT 2',
+ 'PsiI'	=> 'TTATAA 3',
+ 'Psp03I'	=> 'GGWCC 4',
+ 'Psp5II'	=> 'RGGWCCY 2',
+ 'Psp6I'	=> 'CCWGG 0',
+ 'Psp1406I'	=> 'AACGTT 2',
+ 'PspAI'	=> 'CCCGGG 1',
+ 'Psp124BI'	=> 'GAGCTC 5',
+ 'PspEI'	=> 'GGTNACC 1',
+ 'PspGI'	=> 'CCWGG 0',
+ 'PspLI'	=> 'CGTACG 1',
+ 'PspN4I'	=> 'GGNNCC 3',
+ 'PspOMI'	=> 'GGGCCC 1',
+ 'PspPI'	=> 'GGNCC 1',
+ 'PspPPI'	=> 'RGGWCCY 2',
+ 'PssI'	=> 'RGGNCCY 5',
+ 'PstI'	=> 'CTGCAG 5',
+ 'PsuI'	=> 'RGATCY 1',
+ 'PsyI'	=> 'GACNNNGTC 4',
+ 'PvuI'	=> 'CGATCG 4',
+ 'PvuII'	=> 'CAGCTG 3',
+ 'RcaI'	=> 'TCATGA 1',
+ 'RsaI'	=> 'GTAC 2',
+ 'RsrII'	=> 'CGGWCCG 2',
+ 'Rsr2I'	=> 'CGGWCCG 2',
+ 'SacI'	=> 'GAGCTC 5',
+ 'SacII'	=> 'CCGCGG 4',
+ 'SalI'	=> 'GTCGAC 1',
+ 'SanDI'	=> 'GGGWCCC 2',
+ 'SatI'	=> 'GCNGC 2',
+ 'SauI'	=> 'CCTNAGG 2',
+ 'Sau96I'	=> 'GGNCC 1',
+ 'Sau3AI'	=> 'GATC 0',
+ 'SbfI'	=> 'CCTGCAGG 6',
+ 'ScaI'	=> 'AGTACT 3',
+ 'SciI'	=> 'CTCGAG 3',
+ 'ScrFI'	=> 'CCNGG 2',
+ 'SdaI'	=> 'CCTGCAGG 6',
+ 'SduI'	=> 'GDGCHC 5',
+ 'SecI'	=> 'CCNNGG 1',
+ 'SelI'	=> 'CGCG 0',
+ 'SexAI'	=> 'ACCWGGT 1',
+ 'SfcI'	=> 'CTRYAG 1',
+ 'SfeI'	=> 'CTRYAG 1',
+ 'SfiI'	=> 'GGCCNNNNNGGCC 8',
+ 'SfoI'	=> 'GGCGCC 3',
+ 'Sfr274I'	=> 'CTCGAG 1',
+ 'Sfr303I'	=> 'CCGCGG 4',
+ 'SfuI'	=> 'TTCGAA 2',
+ 'SgfI'	=> 'GCGATCGC 5',
+ 'SgrAI'	=> 'CRCCGGYG 2',
+ 'SgrBI'	=> 'CCGCGG 4',
+ 'SinI'	=> 'GGWCC 1',
+ 'SlaI'	=> 'CTCGAG 1',
+ 'SmaI'	=> 'CCCGGG 3',
+ 'SmiI'	=> 'ATTTAAAT 4',
+ 'SmiMI'	=> 'CAYNNNNRTG 5',
+ 'SmlI'	=> 'CTYRAG 1',
+ 'SnaBI'	=> 'TACGTA 3',
+ 'SpaHI'	=> 'GCATGC 5',
+ 'SpeI'	=> 'ACTAGT 1',
+ 'SphI'	=> 'GCATGC 5',
+ 'SplI'	=> 'CGTACG 1',
+ 'SrfI'	=> 'GCCCGGGC 4',
+ 'Sse9I'	=> 'AATT 0',
+ 'Sse232I'	=> 'CGCCGGCG 2',
+ 'Sse8387I'	=> 'CCTGCAGG 6',
+ 'Sse8647I'	=> 'AGGWCCT 2',
+ 'SseBI'	=> 'AGGCCT 3',
+ 'SspI'	=> 'AATATT 3',
+ 'SspBI'	=> 'TGTACA 1',
+ 'SstI'	=> 'GAGCTC 5',
+ 'SstII'	=> 'CCGCGG 4',
+ 'StuI'	=> 'AGGCCT 3',
+ 'StyI'	=> 'CCWWGG 1',
+ 'SunI'	=> 'CGTACG 1',
+ 'SwaI'	=> 'ATTTAAAT 4',
+ 'TaaI'	=> 'ACNGT 3',
+ 'TaiI'	=> 'ACGT 4',
+ 'TaqI'	=> 'TCGA 1',
+ 'TasI'	=> 'AATT 0',
+ 'TatI'	=> 'WGTACW 1',
+ 'TauI'	=> 'GCSGC 4',
+ 'TelI'	=> 'GACNNNGTC 4',
+ 'TfiI'	=> 'GAWTC 1',
+ 'ThaI'	=> 'CGCG 2',
+ 'TliI'	=> 'CTCGAG 1',
+ 'Tru1I'	=> 'TTAA 1',
+ 'Tru9I'	=> 'TTAA 1',
+ 'TscI'	=> 'ACGT 4',
+ 'TseI'	=> 'GCWGC 1',
+ 'Tsp45I'	=> 'GTSAC 0',
+ 'Tsp509I'	=> 'AATT 0',
+ 'Tsp4CI'	=> 'ACNGT 3',
+ 'TspEI'	=> 'AATT 0',
+ 'Tth111I'	=> 'GACNNNGTC 4',
+ 'TthHB8I'	=> 'TCGA 1',
+ 'UnbI'	=> 'GGNCC 0',
+ 'Van91I'	=> 'CCANNNNNTGG 7',
+ 'Vha464I'	=> 'CTTAAG 1',
+ 'VneI'	=> 'GTGCAC 1',
+ 'VpaK11AI'	=> 'GGWCC 0',
+ 'VpaK11BI'	=> 'GGWCC 1',
+ 'VspI'	=> 'ATTAAT 2',
+ 'XagI'	=> 'CCTNNNNNAGG 5',
+ 'XapI'	=> 'RAATTY 1',
+ 'XbaI'	=> 'TCTAGA 1',
+ 'XceI'	=> 'RCATGY 5',
+ 'XcmI'	=> 'CCANNNNNNNNNTGG 8',
+ 'XhoI'	=> 'CTCGAG 1',
+ 'XhoII'	=> 'RGATCY 1',
+ 'XmaI'	=> 'CCCGGG 1',
+ 'XmaIII'	=> 'CGGCCG 1',
+ 'XmaCI'	=> 'CCCGGG 1',
+ 'XmaJI'	=> 'CCTAGG 1',
+ 'XmiI'	=> 'GTMKAC 2',
+ 'XmnI'	=> 'GAANNNNTTC 5',
+ 'XspI'	=> 'CTAG 1',
+ 'ZhoI'	=> 'ATCGAT 2',
+ 'ZraI'	=> 'GACGTC 3',
+ 'Zsp2I'	=> 'ATGCAT 5',
+);
+
+@RE_available = sort keys %RE;
+
+
+=head1 new
+
+ Title     : new
+ Purpose   : Initializes the RestrictionEnzyme object and calls
+           : superclass constructor last (Bio:Seq.pm).
+ Returns   : n/a
+ Argument  : Parameters passed to new()
+ Comments  : A RestrictionEnzyme object manages its recognition sequence
+           : as a Bio::PrimarySeq object.
+
+See Also   : L<_make_custom>(), L<_make_standard>(), B<Bio::PrimarySeq.pm::_initialize()>
+
+=cut
+
+#---------------
+sub new {
+#---------------
+    my($class, @args) = @_;
+
+    my $self = $class->SUPER::new(@args);
+    my ($name,$make) = $self->_rearrange([qw(NAME MAKE)],@args);
+    $name && $self->name($name);
+    my %data;
+    if(defined $make && $make eq 'custom') {
+	%data = $self->_make_custom($name); 
+    } else {
+	%data = $self->_make_standard($name);
+    }
+    $self->{'_seq'} = new Bio::PrimarySeq(%data, 
+				   -VERBOSE =>$self->verbose,
+ 				   -alphabet => 'dna',
+				   );
+    return $self;
+}
+
+
+#=head1 _make_standard
+#
+# Title     : _make_standard
+# Usage     : n/a; automatically called by _initialize()
+# Purpose   : Permits custom RE object construction from name.
+#	    : 'EcoRI'.
+# Returns   : Hash containing named parameters for Bio::PrimarySeq.pm constructor.
+# Argument  : String containing string with special syntax.
+# Throws    : Exception if the requested enzyme name is unavailable.
+#	    : NOTE: Case sensitive.
+#
+#See Also   : L<Bio::PrimarySeq::_initialize()|Bio::PrimarySeq>, L<_make_custom()|_make_custom>
+#
+#=cut
+
+#------------------
+sub _make_standard {
+#------------------
+    my($self, $name) = @_;
+
+    $name =~ s/^\s+|\s+$//g;
+ 
+    $self->is_available($name) || 
+	$self->throw("Unavailable or undefined enzyme: $name (Note: CASE SENSITIVE)\n" .
+		     "Currently available enzymes: \n@RE_available\n");
+
+    my @data = split( ' ', $RE{$name});
+    my (%dat);
+    $dat{-SEQ} = $data[0];
+    $dat{-NAME} = $dat{-ID}= $name;    
+    $self->{'_cuts_after'} = $data[1];
+
+    return %dat;
+}
+
+
+#=head1 _make_custom
+#
+# Title     : _make_custom
+# Usage     : n/a; automatically called by _initialize()
+# Purpose   : Permits custom RE object construction from strings 
+#	    : such as 'EcoRI--G^AATTC' as the name of the enzyme.
+# Returns   : Hash containing named parameters for Bio::PrimarySeq.pm constructor.
+# Argument  : String containing string with special syntax.
+# Throws    : Exception if the string has bad syntax.
+#	    : Warning if the string did not specify cut position.
+#	    :         Places cut site after 5'-most position.
+#
+#See Also   : L<Bio::PrimarySeq::_initialize()|Bio::PrimarySeq>
+#
+#=cut
+
+#'
+#-----------------
+sub _make_custom {
+#-----------------
+    my($self, $name) = @_;
+
+    $name =~ s/\s+//g;
+    my @parts  = split '--', $name;
+    my (%dat);
+    $dat{-NAME} = $dat{-ID} = $parts[0];
+    $self->name($parts[0]);  ## Reset name
+
+    $parts[1] || return $self->throw("Undefined recognition site for $parts[0].",
+				      "Use this syntax: EcoRV--GAT^ATC");
+    ## Determine the cuts_after point.
+    my $cut_index = index $parts[1], '^';
+    if( $cut_index <0) { $cut_index = 0;
+			 $self->warn("Unknown cut position for $parts[0]. Assuming position 0\n" . 
+				     "Use carat to specify cut position (e.g., G^AATTC)"); }
+    $self->{'_cuts_after'} =  $cut_index;
+
+    ## Save the recognition sequence after removing the '^'
+    $parts[1] =~ s/\^//g;
+    $dat{-SEQ} = $parts[1];
+    return %dat;
+}
+    
+
+=head1 cuts_after
+
+ Title     : cuts_after
+ Usage     : $num = $re->cuts_after();
+ Purpose   : Sets/Gets an integer indicating the position of cleavage 
+           : relative to the 5' end of the recognition sequence.
+ Returns   : Integer
+ Argument  : Integer (optional)
+ Throws    : Exception if argument is non-numeric.
+ Access    : Public
+ Comments  : This method is only needed to change the cuts at
+           : position. This data is automatically set during
+           : construction.
+
+See Also   : L<_make_standard()|_make_standard>, L<_make_custom()|_make_custom>
+
+=cut
+
+#'
+#---------------
+sub cuts_after { 
+#---------------
+    my $self = shift; 
+    if(@_) { my $num = shift;
+	     if($num == 0 and $num ne '0') {
+		 $self->throw("The cuts_after position be an integer ($num)");
+	     }
+	     $self->{'_cuts_after'} = $num;
+	 }
+    $self->{'_cuts_after'}; 
+}
+
+
+
+=head1 site
+
+ Title     : site
+ Usage     : $re->site();
+ Purpose   : Gets the recognition sequence for the enzyme. 
+ Example   : $seq_string = $re->site();
+ Returns   : String containing recognition sequence indicating 
+           : cleavage site as in  'G^AATTC'.
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : If you want a simple string representing the site without 
+             any '^', use the string() method.
+
+See Also   : L<string()|string>
+
+=cut
+
+#---------
+sub site {
+#---------
+    my $self = shift;
+    my $seq = $self->seq;
+    my $cuts_after = $self->cuts_after;
+    if($cuts_after > 0) {
+	if( $cuts_after >= $seq->length) {
+	    return $seq->seq.'^';
+	} else { 
+	    return $seq->subseq(1, $self->cuts_after).'^'.$seq->subseq($self->cuts_after+1, $seq->length); 
+	}
+    } else {
+        return $seq->seq;
+    }
+}
+
+
+=head1 seq
+
+ Title     : seq
+ Usage     : $re->seq();
+ Purpose   : Get the Bio::PrimarySeq.pm-derived object representing 
+           : the recognition sequence
+ Returns   : String
+ Argument  : n/a
+ Throws    : n/a
+
+See Also   : L<string()|string>, L<revcom()|revcom>
+
+=cut
+
+#---------
+sub seq    {  my $self = shift; $self->{'_seq'}; }
+#---------
+
+
+
+=head1 string
+
+ Title     : string
+ Usage     : $re->string();
+ Purpose   : Get a string representing the recognition sequence.
+ Returns   : String. Does NOT contain a  '^' representing the cut location
+             as returned by the site() method
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : Delegates to the Bio::PrimarySeq-derived object.
+
+See Also   : L<seq()|seq>, L<site()|site>, L<revcom()|revcom>
+
+=cut
+
+#-----------
+sub string {  my $self = shift; $self->{'_seq'}->seq; }
+#-----------
+
+
+
+=head1 revcom
+
+ Title     : revcom
+ Usage     : $re->revcom();
+ Purpose   : Get a string representing the reverse complement of
+           : the recognition sequence.
+ Returns   : String
+ Argument  : n/a
+ Throws    : n/a
+ Comments  : Delegates to the Bio::PrimarySeq.pm-derived object, but needs to
+             get out the string from it, as now Bio::PrimarySeq->revcom makes a
+             Bio::PrimarySeq object
+
+See Also   : L<seq()|seq>, L<string()|string>
+
+=cut
+
+#-----------
+sub revcom {  my $self = shift; $self->{'_seq'}->revcom->seq(); }
+#-----------
+
+
+
+=head1 cut_seq
+
+ Title     : cut_seq
+ Usage     : $re->cut_seq(<sequence object>);
+ Purpose   : Conceptually cut or "digest" a DNA sequence with the given enzyme.
+ Example   : $string = $re->cut_seq(<sequence object>); 
+ Returns   : List of strings containing the resulting fragments.
+ Argument  : Reference to a Bio::PrimarySeq.pm-derived object.
+ Throws    : Exception if argument is not an object.
+           : (Does not yet verify that it is derived from Bio::PrimarySeq.pm.)
+ Comments  : Strategy relies on Perl's built-in split() function.
+           : Since split removes the recognition pattern, the resulting
+           : fragments are repaired after split()-ing.
+           : A side-effect of this is that for sites with ambiguous
+           : recognition sequence (i.e., containing N), the fragments
+           : will contain ambiguity characters instead of AGCT.
+           :
+           : There is currently no support for partial digestions.
+           : There is currently no support for circular sequences.
+           : (This should just involve merging the first and last frag
+           : if $seqObj->is_circular returns true).
+
+=cut
+
+#'
+#-------------
+sub cut_seq {
+#-------------
+    my( $self, $seqObj) = @_;
+    if( !ref($seqObj) || 
+	! $seqObj->isa('Bio::PrimarySeqI') ) {
+	$self->throw( "Can't cut sequence. Missing or invalid object".
+		      "seqObj: $seqObj");
+    }
+
+    my $cuts_after = $self->{'_cuts_after'};
+    my ($site_3prime_seq, $site_5prime_seq);
+    my $reSeq = $self->seq;
+    if($cuts_after == 0) {
+	$site_3prime_seq = '';
+	$site_5prime_seq = $reSeq->seq();
+    } elsif($cuts_after == $reSeq->length) {
+	$site_3prime_seq = $reSeq->seq();
+	$site_5prime_seq = '';
+    } else {
+	$site_3prime_seq = $reSeq->subseq(1, $self->{'_cuts_after'});
+	$site_5prime_seq = $reSeq->subseq($self->{'_cuts_after'}+1, $reSeq->length);
+    }
+
+    $self->debug("3' site: $site_3prime_seq\n5' site: $site_5prime_seq\n");
+
+    my(@re_frags);
+    my $seq = uc $self->_expanded_string;
+
+    if(!$self->palindromic and $self->name ne 'N') {
+	my $revseq = $self->_expanded_string( $reSeq->revcom->seq );
+	$seq .= '|'.uc($revseq);
+    }
+    $self->debug(sprintf("$ID: site seq: %s\n\n", $seq));
+    $self->debug(sprintf("$ID: splitting %s\n\n",$reSeq->seq));
+    @re_frags = split(/$seq/i, $seqObj->seq);
+
+    $self->debug("$ID: cut_seq, ".scalar @re_frags. " fragments.\n");
+
+    ## Re-attach the split recognition site back to the frags
+    ## since perl zapped them in the split() call.
+    my($i);
+    my $numFrags = scalar @re_frags;
+    for($i=0; $i<$numFrags; $i++) {
+        $i < $#re_frags  and $re_frags[$i] = $re_frags[$i].$site_3prime_seq;
+        $i > 0           and $re_frags[$i] = $site_5prime_seq.$re_frags[$i];
+    }
+    @re_frags;
+}
+
+=head1 cut_locations
+
+ Title     : cut_locations
+ Usage     : my $locations = $re->cut_locations(<sequence_object>);
+ Purpose   : Report the location of the recognition site(s) within
+           : an input sequence. 
+ Example   : my $locations = $re->annotate_seq($seqObj);
+ Returns   : Arrayref of starting locations where enzyme would cut 
+ Argument  : Reference to a Bio::PrimarySeqI-derived sequence object.
+ Throws    : n/a
+ Comments  : 
+
+=cut
+
+#-----------------
+sub cut_locations {
+#-----------------
+    my($self, $seqobj) = @_;
+
+    my $site = $self->_expanded_string;
+    my $seq = $seqobj->seq;
+    study($seq);
+    my @locations;
+    while( $seq =~ /($site)/ig ) {
+        # $` is preceding string before pattern so length returns position
+	push @locations, length($`); 	
+    }
+    return \@locations;
+}    
+
+# Purpose : Expand nucleotide ambiguity codes to their representative letters
+# Argument: (optional) the string to be expanded. If not supplied, used
+#           the string returned by $self->string().
+# Returns : String
+sub _expanded_string {
+    my ($self, $str) = @_;
+    
+    $str ||= $self->string;
+
+    if( $self->name ne 'N' ) {
+        $str =~ s/N|X/\./g;
+        $str =~ s/R/\[AG\]/g;
+        $str =~ s/Y/\[CT\]/g;
+        $str =~ s/S/\[GC\]/g;
+        $str =~ s/W/\[AT\]/g;
+        $str =~ s/M/\[AC\]/g;
+        $str =~ s/K/\[TG\]/g;
+        $str =~ s/B/\[CGT\]/g;
+        $str =~ s/D/\[AGT\]/g;
+        $str =~ s/H/\[ACT\]/g;
+        $str =~ s/V/\[ACG\]/g;
+    }
+    return $str;
+}
+
+
+=head1 annotate_seq
+
+ Title     : annotate_seq
+ Usage     : $re->annotate_seq(<sequence_object>);
+ Purpose   : Identify the location of the recognition site(s) within
+           : an input sequence. Uses HTML.
+ Example   : $annot_seq = $re->annotate_seq($seqObj);
+ Returns   : String containing the annotated sequence.
+ Argument  : Reference to a Bio::PrimarySeq.pm-derived sequence object.
+ Throws    : n/a
+ Comments  : The annotated sequence must be viewed with a web
+           : browser to see the location(s) of the recognition site(s).
+
+=cut
+
+#-----------------
+sub annotate_seq {
+#-----------------
+    my($self, $seqObj) = @_;
+
+    my $site = $self->_expanded_string;
+    my $seq = $seqObj->seq;
+
+    $seq =~ s|$site|<b>$site</b>|g;
+    return $seq;
+}    
+
+
+=head1 palindromic
+
+ Title     : palindromic
+ Usage     : $re->palindromic();
+ Purpose   : Determines if the recognition sequence is palindromic
+           : for the current restriction enzyme.
+ Returns   : Boolean
+ Argument  : n/a
+ Throws    : n/a
+ Access    : Public 
+ Comments  : A palindromic site (EcoRI): 5-GAATTC-3
+           :                             3-CTTAAG-5
+
+=cut
+
+#----------------
+sub palindromic {
+#----------------
+    my $self = shift;
+    $self->string eq $self->revcom;
+}
+
+
+
+=head1 is_available
+
+ Title     : is_available
+ Usage     : $re->is_available(<string containing name of enzyme>);
+ Purpose   : Determine if an enzyme is available (to this module).
+           : (see the package lexical %RE).
+ Example   : $re->is_available('EcoRI');
+           : &Bio::Tools::RestrictionEnzyme::is_available($object,'EcoRI');
+ Returns   : Boolean
+ Argument  : String
+ Throws    : n/a
+ Comments  : This method does NOT give information about
+           : commercial availability (yet). 
+           : Enzyme names are CASE SENSITIVE.
+
+See Also   : L<available_list()|available_list>
+
+=cut
+
+#----------------
+sub is_available {
+#----------------
+    my($self,$name) = @_;
+    exists $RE{$name};
+}
+
+#--------------
+sub available {
+#--------------
+    my($self,$name) = @_;
+    print STDERR "\nDeprecated method: $ID:: available(); ".
+	"use is_available() instead.\n";
+    $self->is_available($name);
+}
+
+
+=head2 name
+
+ Title   : name
+ Usage   : $obj->name($newval)
+ Function: 
+ Example : 
+ Returns : value of name
+ Args    : newvalue (optional)
+
+
+=cut
+
+sub name{
+   my ($obj,$value) = @_;
+   if( defined $value) {
+      $obj->{'name'} = $value;
+    }
+    return $obj->{'name'};
+
+}
+
+=head1 available_list
+
+ Title     : available_list
+ Usage     : $re->available_list([<integer>]);
+ Purpose   : Retrieve a list of currently available enzymes.
+ Example   : @all = $re->available_list();  ## All enzymes
+           : @six_cutters = $re->available_list(6);  ## All 6-cutters
+ Returns   : List of strings
+ Argument  : Integer (optional)
+ Throws    : n/a
+ Comments  : This method may be more appropriate for a REData.pm class.
+
+See Also   : L<is_available()|is_available>
+
+=cut
+
+#-------------------
+sub available_list {
+#-------------------
+    my($self,$size) = @_;
+    $size ||= 'all';
+
+    $size eq 'all' and return @RE_available;
+
+    my(@data, @names);
+    foreach (@RE_available) {
+	@data = split /\s/, $RE{$_};
+	if(length $data[0] == $size) {
+	    push @names, $_;
+	}
+    }
+    @names;
+}
+
+1;