diff variant_effect_predictor/Bio/Variation/VariantI.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/Variation/VariantI.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,1053 @@
+# $Id: VariantI.pm,v 1.12 2002/10/22 07:38:49 lapp Exp $
+#
+# BioPerl module for Bio::Variation::VariantI
+#
+# Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
+#
+# Copyright Heikki Lehvaslaiho
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Variation::VariantI - Sequence Change SeqFeature abstract class
+
+=head1 SYNOPSIS
+
+  #get Bio::Variant::VariantI somehow
+  print $var->restriction_changes, "\n";
+  foreach $allele ($var->each_Allele) {
+      #work on Bio::Variation::Allele objects
+  }
+
+=head1 DESCRIPTION
+
+This superclass defines common methods to basic sequence changes.  The
+instantiable classes Bio::Variation::DNAMutation,
+Bio::Variation::RNAChange and Bio::Variation::AAChange use them.
+See L<Bio::Variation::DNAMutation>, L<Bio::Variation::RNAChange>,
+and L<Bio::Variation::AAChange> for more information.
+
+These classes store information, heavy computation to detemine allele
+sequences is done elsewhere.
+
+The database cross-references are implemented as
+Bio::Annotation::DBLink objects. The methods to access them are
+defined in Bio::DBLinkContainerI. See L<Bio::Annotation::DBLink>
+and L<Bio::DBLinkContainerI> for details.
+
+Bio::Variation::VariantI redifines and extends
+Bio::SeqFeature::Generic for sequence variations. This class
+describes specific sequence change events. These events are always
+from a specific reference sequence to something different. See
+L<Bio::SeqFeature::Generic> for more information.
+
+IMPORTANT: The notion of reference sequence permeates all
+Bio::Variation classes. This is especially important to remember when
+dealing with Alleles. In a polymorphic site, there can be a large
+number of alleles. One of then has to be selected to be the reference
+allele (allele_ori). ALL the rest has to be passed to the Variant
+using the method add_Allele, including the mutated allele in a
+canonical mutation. The IO modules and generated attributes depend on
+it. They ignore the allele linked to using allele_mut and circulate
+each Allele returned by each_Allele into allele_mut and calculate
+the changes between that and allele_ori.
+
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to the 
+Bioperl mailing lists  Your participation is much appreciated.
+
+  bioperl-l@bioperl.org                         - General discussion
+  http://bio.perl.org/MailList.html             - About the mailing lists
+
+=head2 Reporting Bugs
+
+report bugs to the Bioperl bug tracking system to help us keep track
+ the bugs and their resolution.  Bug reports can be submitted via
+ email or the web:
+
+  bioperl-bugs@bio.perl.org
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Heikki Lehvaslaiho
+
+Email:  heikki@ebi.ac.uk
+Address: 
+
+     EMBL Outstation, European Bioinformatics Institute
+     Wellcome Trust Genome Campus, Hinxton
+     Cambs. CB10 1SD, United Kingdom 
+
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object
+methods. Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::Variation::VariantI;
+$VERSION=1.0;
+use vars qw(@ISA);
+use strict;
+use  Bio::Root::Root;
+use  Bio::DBLinkContainerI;
+# Object preamble - inheritance
+
+use Bio::SeqFeature::Generic;
+@ISA = qw(Bio::Root::Root Bio::SeqFeature::Generic Bio::DBLinkContainerI );
+
+=head2 id
+
+ Title   : id
+ Usage   : $obj->id
+ Function:
+
+           Read only method. Returns the id of the variation object.
+           The id is the id of the first DBLink object attached to this object.
+
+ Example :
+ Returns : scalar
+ Args    : none
+
+=cut
+
+sub id {
+   my ($self) = @_;
+   my @ids = $self->each_DBLink;
+   my $id = $ids[0] if scalar @ids > 0;
+   return $id->database. "::". $id->primary_id if $id;
+}
+
+
+=head2 add_Allele
+
+ Title   : add_Allele
+ Usage   : $self->add_Allele($allele)
+ Function: 
+
+	    Adds one Bio::Variation::Allele into the list of alleles.
+            Note that the method forces the convention that nucleotide
+            sequence is in lower case and amino acds are in upper
+            case.
+
+ Example : 
+ Returns : 1 when succeeds, 0 for failure.
+ Args    : Allele object
+
+=cut
+
+
+sub add_Allele {
+  my ($self,$value) = @_;
+  if (defined $value) {
+      if( ! $value->isa('Bio::Variation::Allele') ) {
+	  my $com = ref $value;
+	  $self->throw("Is not a Allele object but a  [$com]");
+	  return 0;
+      } else {
+	  if ( $self->isa('Bio::Variation::AAChange') ) {
+	      $value->seq( uc $value->seq) if $value->seq;
+	  } else {
+	      $value->seq( lc $value->seq) if $value->seq;
+	  } 
+	  push(@{$self->{'alleles'}},$value); 
+	  $self->allele_mut($value); #????
+	  return 1;
+      }
+  } else {
+      return 0;
+  }
+}
+
+
+=head2 each_Allele
+
+ Title   : alleles
+ Usage   : $obj->each_Allele();
+ Function: 
+
+	     Returns a list of Bio::Variation::Allele objects
+
+ Example : 
+ Returns : list of Alleles
+ Args    : none
+
+=cut
+
+sub each_Allele{
+   my ($self,@args) = @_;
+   return @{$self->{'alleles'}};
+}
+
+
+ 
+=head2 isMutation
+ 
+ Title   : isMutation
+ Usage   : print join('/', $obj->each_Allele) if not $obj->isMutation;
+ Function:
+
+           Returns or sets the boolean value indicating that the
+           variant descibed is a canonical mutation with two alleles
+           assinged to be the original (wild type) allele and mutated
+           allele, respectively. If this value is not set, it is
+           assumed that the Variant descibes polymorphisms.
+
+ Returns : a boolean
+
+=cut
+
+sub isMutation {
+    my ($self,$value) = @_;
+    if (defined $value) {
+        if ($value ) {
+            $self->{'isMutation'} = 1;
+        } else {
+            $self->{'isMutation'} = 0;
+        }
+    }
+    return $self->{'isMutation'};
+} 
+
+
+=head2 allele_ori
+
+ Title   : allele_ori
+ Usage   : $obj->allele_ori();
+ Function: 
+
+            Links to and returns the Bio::Variation::Allele object.
+            If value is not set, returns false. All other Alleles are
+            compared to this.
+
+            Amino acid sequences are stored in upper case characters,
+            others in lower case.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+See L<Bio::Variation::Allele> for more.
+
+=cut
+
+sub allele_ori {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) {
+	   $self->throw("Value is not Bio::Variation::Allele but [$value]");
+       } else {
+	   if ( $self->isa('Bio::Variation::AAChange') ) {
+	       $value->seq( uc $value->seq) if $value->seq;
+	   } else {
+	       $value->seq( lc $value->seq) if $value->seq;
+	   } 
+	   $self->{'allele_ori'} = $value;
+       }
+   }
+   return $self->{'allele_ori'};
+}
+
+
+=head2 allele_mut
+
+ Title   : allele_mut
+ Usage   : $obj->allele_mut();
+ Function: 
+
+             Links to and returns the Bio::Variation::Allele
+             object.  Sets and returns the mutated allele sequence.
+             If value is not set, returns false.
+
+             Amino acid sequences are stored in upper case characters,
+             others in lower case.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+See L<Bio::Variation::Allele> for more.
+
+=cut
+
+
+sub allele_mut {
+   my ($self,$value) = @_;
+   if( defined $value) {
+       if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) {
+	   $self->throw("Value is not Bio::Variation::Allele but [$value]");
+       } else {
+	   if ( $self->isa('Bio::Variation::AAChange') ) {
+	       $value->seq( uc $value->seq) if $value->seq;
+	   } else {
+	       $value->seq( lc $value->seq) if $value->seq;
+	   } 
+	   $self->{'allele_mut'} = $value;
+       }
+   }
+   return $self->{'allele_mut'};
+}
+
+=head2 length
+
+ Title   : length
+ Usage   : $obj->length();
+ Function: 
+
+            Sets and returns the length of the affected original
+            allele sequence.  If value is not set, returns false == 0.
+
+            Value 0 means that the variant position is before the
+            start=end sequence position. (Value 1 would denote a point
+            mutation). This follows the convension to report an
+            insertion (2insT) in equivalent way to a corresponding
+            deletion (2delT) (Think about indel polymorpism ATC <=> AC
+            where the origianal state is not known ).
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub length {
+   my ($self,$value) = @_;
+   if ( defined $value) {
+       $self->{'length'} = $value;
+  }
+   if ( ! exists $self->{'length'} ) {
+       return 0;
+   } 
+   return $self->{'length'};
+}
+
+=head2 upStreamSeq
+
+ Title   : upStreamSeq
+ Usage   : $obj->upStreamSeq();
+ Function: 
+
+            Sets and returns upstream flanking sequence string.  If
+            value is not set, returns false. The sequence should be
+            >=25 characters long, if possible.
+
+ Example : 
+ Returns : string or false
+ Args    : string
+
+=cut
+
+
+sub upStreamSeq {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'upstreamseq'} = $value;
+    }
+   return $self->{'upstreamseq'};
+}
+
+
+=head2 dnStreamSeq
+
+ Title   : dnStreamSeq
+ Usage   : $obj->dnStreamSeq();
+ Function: 
+
+            Sets and returns dnstream flanking sequence string.  If
+            value is not set, returns false. The sequence should be
+            >=25 characters long, if possible.
+
+ Example : 
+ Returns : string or false
+ Args    : string
+
+=cut
+
+
+sub dnStreamSeq {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'dnstreamseq'} = $value;
+    }
+    return $self->{'dnstreamseq'};
+    
+}
+
+
+=head2 label
+
+ Title   : label
+ Usage   : $obj->label();
+ Function: 
+
+            Sets and returns mutation event label(s).  If value is not
+            set, or no argument is given returns false.  Each
+            instantiable class needs to implement this method. Valid
+            values are listed in 'Mutation event controlled vocabulary' in
+            http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub label {
+    my ($self,$value) = @_;
+    $self->throw("[$self] has not implemeted method 'label'");
+}
+
+
+
+=head2 status
+
+ Title   : status
+ Usage   : $obj->status()
+ Function: 
+
+           Returns the status of the sequence change object.
+           Valid values are: 'suspected' and 'proven'
+
+ Example : $obj->status('proven');
+ Returns : scalar
+ Args    : valid string (optional, for setting)
+
+
+=cut
+
+
+sub status {
+   my ($self,$value) = @_;
+   my %status = (suspected => 1,
+		 proven => 1
+		 );
+
+   if( defined $value) {
+       $value = lc $value;
+       if ($status{$value}) {
+	   $self->{'status'} = $value;
+       } 
+       else {
+	   $self->throw("$value is not valid status value!");
+       }
+    }
+   if( ! exists $self->{'status'} ) {
+       return "$self";
+   }
+   return $self->{'status'};
+}
+
+
+=head2 proof
+
+ Title   : proof
+ Usage   : $obj->proof()
+ Function: 
+
+           Returns the proof of the sequence change object.
+           Valid values are: 'computed' and 'experimental'.
+
+ Example : $obj->proof('computed');
+ Returns : scalar
+ Args    : valid string (optional, for setting)
+
+
+=cut
+
+
+sub proof {
+    my ($self,$value) = @_;
+    my %proof = (computed => 1,
+		 experimental => 1
+		 );
+
+    if( defined $value) {
+	$value = lc $value;
+	if ($proof{$value}) {
+	    $self->{'proof'} = $value;
+	} else {
+	    $self->throw("$value is not valid proof value!");
+	}
+    }
+    return $self->{'proof'};
+}
+
+
+=head2 region
+
+ Title   : region
+ Usage   : $obj->region();
+ Function: 
+
+            Sets and returns the name of the sequence region type or
+            protein domain at this location.  If value is not set,
+            returns false.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub region {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'region'} = $value;
+    }
+    return $self->{'region'};
+}
+
+
+=head2 region_value
+
+ Title   : region_value
+ Usage   : $obj->region_value();
+ Function: 
+
+            Sets and returns the name of the sequence region_value or
+            protein domain at this location.  If value is not set,
+            returns false.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+
+sub region_value {
+    my ($self,$value) = @_;
+    if( defined $value) {
+	$self->{'region_value'} = $value;
+    }
+    return $self->{'region_value'};
+}
+
+=head2 region_dist
+
+ Title   : region_dist
+ Usage   : $obj->region_dist();
+ Function: 
+
+            Sets and returns the distance tot the closest region
+            (i.e. intro/exon or domain) boundary. If distance is not
+            set, returns false.
+
+ Example : 
+ Returns : integer
+ Args    : integer
+
+=cut
+
+
+sub region_dist {
+    my ($self,$value) = @_;
+    if( defined $value) {
+       if (  not $value =~ /^[+-]?\d+$/ ) {
+	   $self->throw("[$value] for region_dist has to be an integer\n");
+        } else {
+	    $self->{'region_dist'} = $value;
+        }
+    }
+    return $self->{'region_dist'};
+}
+
+
+=head2 numbering
+
+ Title   : numbering
+ Usage   : $obj->numbering()
+ Function: 
+
+           Returns the numbering chema used locating sequnce features.
+           Valid values are: 'entry' and 'coding'
+
+ Example : $obj->numbering('coding');
+ Returns : scalar
+ Args    : valid string (optional, for setting)
+
+
+=cut
+
+
+sub numbering {
+   my ($self,$value) = @_;
+   my %numbering = (entry => 1,
+		    coding => 1
+		    );
+
+   if( defined $value) {
+       $value = lc $value;
+       if ($numbering{$value}) {
+	   $self->{'numbering'} = $value;
+       } 
+       else {
+	   $self->throw("'$value' is not a valid for numbering!");
+       }
+    }
+   if( ! exists $self->{'numbering'} ) {
+       return "$self";
+   }
+   return $self->{'numbering'};
+}
+
+=head2 mut_number
+
+ Title   : mut_number
+ Usage   : $num = $obj->mut_number;
+         : $num = $obj->mut_number($number);
+ Function: 
+
+           Returns or sets the number identifying the order in which the
+           mutation has been issued. Numbers shouldstart from 1.
+           If the number has never been set, the method will return ''
+
+           If you want the output from IO modules look nice and, for
+           multivariant/allele variations, make sense you better set
+           this attribute.
+
+ Returns : an integer
+
+=cut
+
+
+sub mut_number {
+    my ($self,$value) = @_;
+    if (defined $value) {
+	$self->{'mut_number'} = $value;
+    }
+    unless (exists $self->{'mut_number'}) {
+	return ('');
+    } else {
+	return $self->{'mut_number'};
+    }
+}       
+
+
+=head2 SeqDiff
+
+ Title   : SeqDiff
+ Usage   : $mutobj = $obj->SeqDiff;
+         : $mutobj = $obj->SeqDiff($objref);
+ Function: 
+
+           Returns or sets the link-reference to the umbrella
+           Bio::Variation::SeqDiff object.  If there is no link,
+           it will return undef
+
+           Note: Adding a variant into a SeqDiff object will
+           automatically set this value.
+
+ Returns : an obj_ref or undef
+
+See L<Bio::Variation::SeqDiff> for more information.
+
+=cut
+
+sub SeqDiff {
+    my ($self,$value) = @_;
+    if (defined $value) {
+	if( ! $value->isa('Bio::Variation::SeqDiff') ) {
+	    $self->throw("Is not a Bio::Variation::SeqDiff object but a [$value]");
+	    return (undef);
+	}
+	else {
+	    $self->{'seqDiff'} = $value;
+	}
+    }
+    unless (exists $self->{'seqDiff'}) {
+	return (undef);
+    } else {
+	return $self->{'seqDiff'};
+    }
+}
+
+=head2 add_DBLink
+
+ Title   : add_DBLink
+ Usage   : $self->add_DBLink($ref)
+ Function: adds a link object
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+
+sub add_DBLink{
+   my ($self,$com) = @_;
+   if( $com && ! $com->isa('Bio::Annotation::DBLink') ) {
+       $self->throw("Is not a link object but a  [$com]");
+   }
+   $com && push(@{$self->{'link'}},$com);
+}
+
+=head2 each_DBLink
+
+ Title   : each_DBLink
+ Usage   : foreach $ref ( $self->each_DBlink() )
+ Function: gets an array of DBlink of objects
+ Example :
+ Returns : 
+ Args    :
+
+
+=cut
+
+sub each_DBLink{
+   my ($self) = @_;
+   
+   return @{$self->{'link'}}; 
+}
+
+=head2 restriction_changes
+
+ Title   : restriction_changes
+ Usage   : $obj->restriction_changes();
+ Function: 
+
+            Returns a string containing a list of restriction
+            enzyme changes of form +EcoRI, separated by
+            commas. Strings need to be valid restriction enzyme names
+            as stored in REBASE. allele_ori and allele_mut need to be assigned.
+
+ Example : 
+ Returns : string
+ Args    : string
+
+=cut
+
+sub restriction_changes { 
+    my ($self) = @_;
+
+    if (not $self->{'re_changes'}) { 
+	my %re = &_enzymes;
+	
+	# complain if used on AA data
+	if ($self->isa('Bio::Variation::AAChange')) {
+	    $self->throw('Restriction enzymes do not bite polypeptides!');
+	}
+	
+	#sanity checks
+	$self->warn('Upstream sequence is empty!')
+	    if $self->upStreamSeq eq '';
+	$self->warn('Downstream sequence is empty!')
+	    if $self->dnStreamSeq eq '';
+#	 $self->warn('Original allele sequence is empty!')
+#	     if $self->allele_ori eq '';
+#	 $self->warn('Mutated allele sequence is empty!')
+#	     if $self->allele_mut eq '';
+	
+	#reuse the non empty DNA level list at RNA level if the flanks are identical
+	#Hint: Check DNAMutation object first
+	if ($self->isa('Bio::Variation::RNAChange') and  $self->DNAMutation and
+	    $self->upStreamSeq eq $self->DNAMutation->upStreamSeq  and 
+	    $self->dnStreamSeq eq $self->DNAMutation->dnStreamSeq and
+	    $self->DNAMutation->restriction_changes ne '' ) {
+	    $self->{'re_changes'} = $self->DNAMutation->restriction_changes;
+	} else {
+	    
+	    #maximum length of a type II restriction site in the current REBASE
+	    my ($le_dn) = 15; 
+	    my ($le_up) = $le_dn;
+
+	    #reduce the flank lengths if the desired length is not available
+	    $le_dn = CORE::length ($self->dnStreamSeq) if $le_dn > CORE::length ($self->dnStreamSeq);
+	    $le_up = CORE::length ($self->upStreamSeq) if $le_up > CORE::length ($self->upStreamSeq);
+
+	    #Build sequence strings to compare
+	    my ($oriseq, $mutseq);    
+	    $oriseq  = $mutseq = substr($self->upStreamSeq, -$le_up, $le_up);
+	    $oriseq .= $self->allele_ori->seq if $self->allele_ori->seq;
+	    $mutseq .= $self->allele_mut->seq if $self->allele_mut->seq;
+	    $oriseq .= substr($self->dnStreamSeq, 0, $le_dn);
+	    $mutseq .= substr($self->dnStreamSeq, 0, $le_dn);
+	    
+	    # ... and their reverse complements
+	    my $oriseq_rev = _revcompl ($oriseq);
+	    my $mutseq_rev = _revcompl ($mutseq);
+
+	    # collect results into a string
+	    my $rec = '';
+	    foreach my $enz (sort keys (%re)) {
+		my $site = $re{$enz};
+		my @ori = ($oriseq=~ /$site/g);
+		my @mut = ($mutseq=~ /$site/g);
+		my @ori_r = ($oriseq_rev =~ /$site/g);
+		my @mut_r = ($mutseq_rev =~ /$site/g);
+		
+		$rec .= '+'. $enz. ", " 
+		    if (scalar @ori < scalar @mut) or (scalar @ori_r < scalar @mut_r);
+		$rec .= '-'. $enz. ", " 		    
+		    if (scalar @ori > scalar @mut) or (scalar @ori_r > scalar @mut_r);
+		
+	    }
+	    $rec = substr($rec, 0, CORE::length($rec) - 2) if $rec ne '';
+	    $self->{'re_changes'} =  $rec;
+	}
+    }
+    return $self->{'re_changes'}
+}
+
+
+sub _revcompl { 
+    # side effect: lower case letters
+    my ($seq) = shift;
+
+    $seq = lc $seq;
+    $seq =~ tr/acgtrymkswhbvdnx/tgcayrkmswdvbhnx/;
+    return CORE::reverse $seq;
+}
+
+
+sub _enzymes {
+ #REBASE version 005   type2.005
+ my %enzymes =  (
+         'AarI' => 'cacctgc',
+         'AatII' => 'gacgtc',
+         'AccI' => 'gt[ac][gt]ac',
+         'AceIII' => 'cagctc',
+         'AciI' => 'ccgc',
+         'AclI' => 'aacgtt',
+         'AcyI' => 'g[ag]cg[ct]c',
+         'AflII' => 'cttaag',
+         'AflIII' => 'ac[ag][ct]gt',
+         'AgeI' => 'accggt',
+         'AhaIII' => 'tttaaa',
+         'AloI' => 'gaac[acgt][acgt][acgt][acgt][acgt][acgt]tcc',
+         'AluI' => 'agct',
+         'AlwNI' => 'cag[acgt][acgt][acgt]ctg',
+         'ApaBI' => 'gca[acgt][acgt][acgt][acgt][acgt]tgc',
+         'ApaI' => 'gggccc',
+         'ApaLI' => 'gtgcac',
+         'ApoI' => '[ag]aatt[ct]',
+         'AscI' => 'ggcgcgcc',
+         'AsuI' => 'gg[acgt]cc',
+         'AsuII' => 'ttcgaa',
+         'AvaI' => 'c[ct]cg[ag]g',
+         'AvaII' => 'gg[at]cc',
+         'AvaIII' => 'atgcat',
+         'AvrII' => 'cctagg',
+         'BaeI' => 'ac[acgt][acgt][acgt][acgt]gta[ct]c',
+         'BalI' => 'tggcca',
+         'BamHI' => 'ggatcc',
+         'BbvCI' => 'cctcagc',
+         'BbvI' => 'gcagc',
+         'BbvII' => 'gaagac',
+         'BccI' => 'ccatc',
+         'Bce83I' => 'cttgag',
+         'BcefI' => 'acggc',
+         'BcgI' => 'cga[acgt][acgt][acgt][acgt][acgt][acgt]tgc',
+         'BciVI' => 'gtatcc',
+         'BclI' => 'tgatca',
+         'BetI' => '[at]ccgg[at]',
+         'BfiI' => 'actggg',
+         'BglI' => 'gcc[acgt][acgt][acgt][acgt][acgt]ggc',
+         'BglII' => 'agatct',
+         'BinI' => 'ggatc',
+         'BmgI' => 'g[gt]gccc',
+         'BplI' => 'gag[acgt][acgt][acgt][acgt][acgt]ctc',
+         'Bpu10I' => 'cct[acgt]agc',
+         'BsaAI' => '[ct]acgt[ag]',
+         'BsaBI' => 'gat[acgt][acgt][acgt][acgt]atc',
+         'BsaXI' => 'ac[acgt][acgt][acgt][acgt][acgt]ctcc',
+         'BsbI' => 'caacac',
+         'BscGI' => 'cccgt',
+         'BseMII' => 'ctcag',
+         'BsePI' => 'gcgcgc',
+         'BseRI' => 'gaggag',
+         'BseSI' => 'g[gt]gc[ac]c',
+         'BsgI' => 'gtgcag',
+         'BsiI' => 'cacgag',
+         'BsiYI' => 'cc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gg',
+         'BsmAI' => 'gtctc',
+         'BsmI' => 'gaatgc',
+         'Bsp1407I' => 'tgtaca',
+         'Bsp24I' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]tgg',
+         'BspGI' => 'ctggac',
+         'BspHI' => 'tcatga',
+         'BspLU11I' => 'acatgt',
+         'BspMI' => 'acctgc',
+         'BspMII' => 'tccgga',
+         'BsrBI' => 'ccgctc',
+         'BsrDI' => 'gcaatg',
+         'BsrI' => 'actgg',
+         'BstEII' => 'ggt[acgt]acc',
+         'BstXI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]tgg',
+         'BtrI' => 'cacgtc',
+         'BtsI' => 'gcagtg',
+         'Cac8I' => 'gc[acgt][acgt]gc',
+         'CauII' => 'cc[cg]gg',
+         'Cfr10I' => '[ag]ccgg[ct]',
+         'CfrI' => '[ct]ggcc[ag]',
+         'CjeI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]gt',
+         'CjePI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt]tc',
+         'ClaI' => 'atcgat',
+         'CviJI' => '[ag]gc[ct]',
+         'CviRI' => 'tgca',
+         'DdeI' => 'ct[acgt]ag',
+         'DpnI' => 'gatc',
+         'DraII' => '[ag]gg[acgt]cc[ct]',
+         'DraIII' => 'cac[acgt][acgt][acgt]gtg',
+         'DrdI' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]gtc',
+         'DrdII' => 'gaacca',
+         'DsaI' => 'cc[ag][ct]gg',
+         'Eam1105I' => 'gac[acgt][acgt][acgt][acgt][acgt]gtc',
+         'EciI' => 'ggcgga',
+         'Eco31I' => 'ggtctc',
+         'Eco47III' => 'agcgct',
+         'Eco57I' => 'ctgaag',
+         'EcoNI' => 'cct[acgt][acgt][acgt][acgt][acgt]agg',
+         'EcoRI' => 'gaattc',
+         'EcoRII' => 'cc[at]gg',
+         'EcoRV' => 'gatatc',
+         'Esp3I' => 'cgtctc',
+         'EspI' => 'gct[acgt]agc',
+         'FauI' => 'cccgc',
+         'FinI' => 'gggac',
+         'Fnu4HI' => 'gc[acgt]gc',
+         'FnuDII' => 'cgcg',
+         'FokI' => 'ggatg',
+         'FseI' => 'ggccggcc',
+         'GdiII' => 'cggcc[ag]',
+         'GsuI' => 'ctggag',
+         'HaeI' => '[at]ggcc[at]',
+         'HaeII' => '[ag]gcgc[ct]',
+         'HaeIII' => 'ggcc',
+         'HaeIV' => 'ga[ct][acgt][acgt][acgt][acgt][acgt][ag]tc',
+         'HgaI' => 'gacgc',
+         'HgiAI' => 'g[at]gc[at]c',
+         'HgiCI' => 'gg[ct][ag]cc',
+         'HgiEII' => 'acc[acgt][acgt][acgt][acgt][acgt][acgt]ggt',
+         'HgiJII' => 'g[ag]gc[ct]c',
+         'HhaI' => 'gcgc',
+         'Hin4I' => 'ga[cgt][acgt][acgt][acgt][acgt][acgt][acg]tc',
+         'HindII' => 'gt[ct][ag]ac',
+         'HindIII' => 'aagctt',
+         'HinfI' => 'ga[acgt]tc',
+         'HpaI' => 'gttaac',
+         'HpaII' => 'ccgg',
+         'HphI' => 'ggtga',
+         'Hpy178III' => 'tc[acgt][acgt]ga',
+         'Hpy188I' => 'tc[acgt]ga',
+         'Hpy99I' => 'cg[at]cg',
+         'KpnI' => 'ggtacc',
+         'Ksp632I' => 'ctcttc',
+         'MaeI' => 'ctag',
+         'MaeII' => 'acgt',
+         'MaeIII' => 'gt[acgt]ac',
+         'MboI' => 'gatc',
+         'MboII' => 'gaaga',
+         'McrI' => 'cg[ag][ct]cg',
+         'MfeI' => 'caattg',
+         'MjaIV' => 'gt[acgt][acgt]ac',
+         'MluI' => 'acgcgt',
+         'MmeI' => 'tcc[ag]ac',
+         'MnlI' => 'cctc',
+         'MseI' => 'ttaa',
+         'MslI' => 'ca[ct][acgt][acgt][acgt][acgt][ag]tg',
+         'MstI' => 'tgcgca',
+         'MwoI' => 'gc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gc',
+         'NaeI' => 'gccggc',
+         'NarI' => 'ggcgcc',
+         'NcoI' => 'ccatgg',
+         'NdeI' => 'catatg',
+         'NheI' => 'gctagc',
+         'NlaIII' => 'catg',
+         'NlaIV' => 'gg[acgt][acgt]cc',
+         'NotI' => 'gcggccgc',
+         'NruI' => 'tcgcga',
+         'NspBII' => 'c[ac]gc[gt]g',
+         'NspI' => '[ag]catg[ct]',
+         'PacI' => 'ttaattaa',
+         'Pfl1108I' => 'tcgtag',
+         'PflMI' => 'cca[acgt][acgt][acgt][acgt][acgt]tgg',
+         'PleI' => 'gagtc',
+         'PmaCI' => 'cacgtg',
+         'PmeI' => 'gtttaaac',
+         'PpiI' => 'gaac[acgt][acgt][acgt][acgt][acgt]ctc',
+         'PpuMI' => '[ag]gg[at]cc[ct]',
+         'PshAI' => 'gac[acgt][acgt][acgt][acgt]gtc',
+         'PsiI' => 'ttataa',
+         'PstI' => 'ctgcag',
+         'PvuI' => 'cgatcg',
+         'PvuII' => 'cagctg',
+         'RleAI' => 'cccaca',
+         'RsaI' => 'gtac',
+         'RsrII' => 'cgg[at]ccg',
+         'SacI' => 'gagctc',
+         'SacII' => 'ccgcgg',
+         'SalI' => 'gtcgac',
+         'SanDI' => 'ggg[at]ccc',
+         'SapI' => 'gctcttc',
+         'SauI' => 'cct[acgt]agg',
+         'ScaI' => 'agtact',
+         'ScrFI' => 'cc[acgt]gg',
+         'SduI' => 'g[agt]gc[act]c',
+         'SecI' => 'cc[acgt][acgt]gg',
+         'SexAI' => 'acc[at]ggt',
+         'SfaNI' => 'gcatc',
+         'SfeI' => 'ct[ag][ct]ag',
+         'SfiI' => 'ggcc[acgt][acgt][acgt][acgt][acgt]ggcc',
+         'SgfI' => 'gcgatcgc',
+         'SgrAI' => 'c[ag]ccgg[ct]g',
+         'SimI' => 'gggtc',
+         'SmaI' => 'cccggg',
+         'SmlI' => 'ct[ct][ag]ag',
+         'SnaBI' => 'tacgta',
+         'SnaI' => 'gtatac',
+         'SpeI' => 'actagt',
+         'SphI' => 'gcatgc',
+         'SplI' => 'cgtacg',
+         'SrfI' => 'gcccgggc',
+         'Sse232I' => 'cgccggcg',
+         'Sse8387I' => 'cctgcagg',
+         'Sse8647I' => 'agg[at]cct',
+         'SspI' => 'aatatt',
+         'Sth132I' => 'cccg',
+         'StuI' => 'aggcct',
+         'StyI' => 'cc[at][at]gg',
+         'SwaI' => 'atttaaat',
+         'TaqI' => 'tcga',
+         'TaqII' => 'gaccga',
+         'TatI' => '[at]gtac[at]',
+         'TauI' => 'gc[cg]gc',
+         'TfiI' => 'ga[at]tc',
+         'TseI' => 'gc[at]gc',
+         'Tsp45I' => 'gt[cg]ac',
+         'Tsp4CI' => 'ac[acgt]gt',
+         'TspEI' => 'aatt',
+         'TspRI' => 'ca[cg]tg[acgt][acgt]',
+         'Tth111I' => 'gac[acgt][acgt][acgt]gtc',
+         'Tth111II' => 'caa[ag]ca',
+         'UbaGI' => 'cac[acgt][acgt][acgt][acgt]gtg',
+         'UbaPI' => 'cgaacg',
+         'VspI' => 'attaat',
+         'XbaI' => 'tctaga',
+         'XcmI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt]tgg',
+         'XhoI' => 'ctcgag',
+         'XhoII' => '[ag]gatc[ct]',
+         'XmaIII' => 'cggccg',
+         'XmnI' => 'gaa[acgt][acgt][acgt][acgt]ttc'
+        );
+
+    return %enzymes;
+}
+
+1;