comparison variant_effect_predictor/Bio/Variation/VariantI.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: VariantI.pm,v 1.12 2002/10/22 07:38:49 lapp Exp $
2 #
3 # BioPerl module for Bio::Variation::VariantI
4 #
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
6 #
7 # Copyright Heikki Lehvaslaiho
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Variation::VariantI - Sequence Change SeqFeature abstract class
16
17 =head1 SYNOPSIS
18
19 #get Bio::Variant::VariantI somehow
20 print $var->restriction_changes, "\n";
21 foreach $allele ($var->each_Allele) {
22 #work on Bio::Variation::Allele objects
23 }
24
25 =head1 DESCRIPTION
26
27 This superclass defines common methods to basic sequence changes. The
28 instantiable classes Bio::Variation::DNAMutation,
29 Bio::Variation::RNAChange and Bio::Variation::AAChange use them.
30 See L<Bio::Variation::DNAMutation>, L<Bio::Variation::RNAChange>,
31 and L<Bio::Variation::AAChange> for more information.
32
33 These classes store information, heavy computation to detemine allele
34 sequences is done elsewhere.
35
36 The database cross-references are implemented as
37 Bio::Annotation::DBLink objects. The methods to access them are
38 defined in Bio::DBLinkContainerI. See L<Bio::Annotation::DBLink>
39 and L<Bio::DBLinkContainerI> for details.
40
41 Bio::Variation::VariantI redifines and extends
42 Bio::SeqFeature::Generic for sequence variations. This class
43 describes specific sequence change events. These events are always
44 from a specific reference sequence to something different. See
45 L<Bio::SeqFeature::Generic> for more information.
46
47 IMPORTANT: The notion of reference sequence permeates all
48 Bio::Variation classes. This is especially important to remember when
49 dealing with Alleles. In a polymorphic site, there can be a large
50 number of alleles. One of then has to be selected to be the reference
51 allele (allele_ori). ALL the rest has to be passed to the Variant
52 using the method add_Allele, including the mutated allele in a
53 canonical mutation. The IO modules and generated attributes depend on
54 it. They ignore the allele linked to using allele_mut and circulate
55 each Allele returned by each_Allele into allele_mut and calculate
56 the changes between that and allele_ori.
57
58
59 =head1 FEEDBACK
60
61 =head2 Mailing Lists
62
63 User feedback is an integral part of the evolution of this and other
64 Bioperl modules. Send your comments and suggestions preferably to the
65 Bioperl mailing lists Your participation is much appreciated.
66
67 bioperl-l@bioperl.org - General discussion
68 http://bio.perl.org/MailList.html - About the mailing lists
69
70 =head2 Reporting Bugs
71
72 report bugs to the Bioperl bug tracking system to help us keep track
73 the bugs and their resolution. Bug reports can be submitted via
74 email or the web:
75
76 bioperl-bugs@bio.perl.org
77 http://bugzilla.bioperl.org/
78
79 =head1 AUTHOR - Heikki Lehvaslaiho
80
81 Email: heikki@ebi.ac.uk
82 Address:
83
84 EMBL Outstation, European Bioinformatics Institute
85 Wellcome Trust Genome Campus, Hinxton
86 Cambs. CB10 1SD, United Kingdom
87
88
89 =head1 APPENDIX
90
91 The rest of the documentation details each of the object
92 methods. Internal methods are usually preceded with a _
93
94 =cut
95
96
97 # Let the code begin...
98
99
100 package Bio::Variation::VariantI;
101 $VERSION=1.0;
102 use vars qw(@ISA);
103 use strict;
104 use Bio::Root::Root;
105 use Bio::DBLinkContainerI;
106 # Object preamble - inheritance
107
108 use Bio::SeqFeature::Generic;
109 @ISA = qw(Bio::Root::Root Bio::SeqFeature::Generic Bio::DBLinkContainerI );
110
111 =head2 id
112
113 Title : id
114 Usage : $obj->id
115 Function:
116
117 Read only method. Returns the id of the variation object.
118 The id is the id of the first DBLink object attached to this object.
119
120 Example :
121 Returns : scalar
122 Args : none
123
124 =cut
125
126 sub id {
127 my ($self) = @_;
128 my @ids = $self->each_DBLink;
129 my $id = $ids[0] if scalar @ids > 0;
130 return $id->database. "::". $id->primary_id if $id;
131 }
132
133
134 =head2 add_Allele
135
136 Title : add_Allele
137 Usage : $self->add_Allele($allele)
138 Function:
139
140 Adds one Bio::Variation::Allele into the list of alleles.
141 Note that the method forces the convention that nucleotide
142 sequence is in lower case and amino acds are in upper
143 case.
144
145 Example :
146 Returns : 1 when succeeds, 0 for failure.
147 Args : Allele object
148
149 =cut
150
151
152 sub add_Allele {
153 my ($self,$value) = @_;
154 if (defined $value) {
155 if( ! $value->isa('Bio::Variation::Allele') ) {
156 my $com = ref $value;
157 $self->throw("Is not a Allele object but a [$com]");
158 return 0;
159 } else {
160 if ( $self->isa('Bio::Variation::AAChange') ) {
161 $value->seq( uc $value->seq) if $value->seq;
162 } else {
163 $value->seq( lc $value->seq) if $value->seq;
164 }
165 push(@{$self->{'alleles'}},$value);
166 $self->allele_mut($value); #????
167 return 1;
168 }
169 } else {
170 return 0;
171 }
172 }
173
174
175 =head2 each_Allele
176
177 Title : alleles
178 Usage : $obj->each_Allele();
179 Function:
180
181 Returns a list of Bio::Variation::Allele objects
182
183 Example :
184 Returns : list of Alleles
185 Args : none
186
187 =cut
188
189 sub each_Allele{
190 my ($self,@args) = @_;
191 return @{$self->{'alleles'}};
192 }
193
194
195
196 =head2 isMutation
197
198 Title : isMutation
199 Usage : print join('/', $obj->each_Allele) if not $obj->isMutation;
200 Function:
201
202 Returns or sets the boolean value indicating that the
203 variant descibed is a canonical mutation with two alleles
204 assinged to be the original (wild type) allele and mutated
205 allele, respectively. If this value is not set, it is
206 assumed that the Variant descibes polymorphisms.
207
208 Returns : a boolean
209
210 =cut
211
212 sub isMutation {
213 my ($self,$value) = @_;
214 if (defined $value) {
215 if ($value ) {
216 $self->{'isMutation'} = 1;
217 } else {
218 $self->{'isMutation'} = 0;
219 }
220 }
221 return $self->{'isMutation'};
222 }
223
224
225 =head2 allele_ori
226
227 Title : allele_ori
228 Usage : $obj->allele_ori();
229 Function:
230
231 Links to and returns the Bio::Variation::Allele object.
232 If value is not set, returns false. All other Alleles are
233 compared to this.
234
235 Amino acid sequences are stored in upper case characters,
236 others in lower case.
237
238 Example :
239 Returns : string
240 Args : string
241
242 See L<Bio::Variation::Allele> for more.
243
244 =cut
245
246 sub allele_ori {
247 my ($self,$value) = @_;
248 if( defined $value) {
249 if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) {
250 $self->throw("Value is not Bio::Variation::Allele but [$value]");
251 } else {
252 if ( $self->isa('Bio::Variation::AAChange') ) {
253 $value->seq( uc $value->seq) if $value->seq;
254 } else {
255 $value->seq( lc $value->seq) if $value->seq;
256 }
257 $self->{'allele_ori'} = $value;
258 }
259 }
260 return $self->{'allele_ori'};
261 }
262
263
264 =head2 allele_mut
265
266 Title : allele_mut
267 Usage : $obj->allele_mut();
268 Function:
269
270 Links to and returns the Bio::Variation::Allele
271 object. Sets and returns the mutated allele sequence.
272 If value is not set, returns false.
273
274 Amino acid sequences are stored in upper case characters,
275 others in lower case.
276
277 Example :
278 Returns : string
279 Args : string
280
281 See L<Bio::Variation::Allele> for more.
282
283 =cut
284
285
286 sub allele_mut {
287 my ($self,$value) = @_;
288 if( defined $value) {
289 if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) {
290 $self->throw("Value is not Bio::Variation::Allele but [$value]");
291 } else {
292 if ( $self->isa('Bio::Variation::AAChange') ) {
293 $value->seq( uc $value->seq) if $value->seq;
294 } else {
295 $value->seq( lc $value->seq) if $value->seq;
296 }
297 $self->{'allele_mut'} = $value;
298 }
299 }
300 return $self->{'allele_mut'};
301 }
302
303 =head2 length
304
305 Title : length
306 Usage : $obj->length();
307 Function:
308
309 Sets and returns the length of the affected original
310 allele sequence. If value is not set, returns false == 0.
311
312 Value 0 means that the variant position is before the
313 start=end sequence position. (Value 1 would denote a point
314 mutation). This follows the convension to report an
315 insertion (2insT) in equivalent way to a corresponding
316 deletion (2delT) (Think about indel polymorpism ATC <=> AC
317 where the origianal state is not known ).
318
319 Example :
320 Returns : string
321 Args : string
322
323 =cut
324
325
326 sub length {
327 my ($self,$value) = @_;
328 if ( defined $value) {
329 $self->{'length'} = $value;
330 }
331 if ( ! exists $self->{'length'} ) {
332 return 0;
333 }
334 return $self->{'length'};
335 }
336
337 =head2 upStreamSeq
338
339 Title : upStreamSeq
340 Usage : $obj->upStreamSeq();
341 Function:
342
343 Sets and returns upstream flanking sequence string. If
344 value is not set, returns false. The sequence should be
345 >=25 characters long, if possible.
346
347 Example :
348 Returns : string or false
349 Args : string
350
351 =cut
352
353
354 sub upStreamSeq {
355 my ($self,$value) = @_;
356 if( defined $value) {
357 $self->{'upstreamseq'} = $value;
358 }
359 return $self->{'upstreamseq'};
360 }
361
362
363 =head2 dnStreamSeq
364
365 Title : dnStreamSeq
366 Usage : $obj->dnStreamSeq();
367 Function:
368
369 Sets and returns dnstream flanking sequence string. If
370 value is not set, returns false. The sequence should be
371 >=25 characters long, if possible.
372
373 Example :
374 Returns : string or false
375 Args : string
376
377 =cut
378
379
380 sub dnStreamSeq {
381 my ($self,$value) = @_;
382 if( defined $value) {
383 $self->{'dnstreamseq'} = $value;
384 }
385 return $self->{'dnstreamseq'};
386
387 }
388
389
390 =head2 label
391
392 Title : label
393 Usage : $obj->label();
394 Function:
395
396 Sets and returns mutation event label(s). If value is not
397 set, or no argument is given returns false. Each
398 instantiable class needs to implement this method. Valid
399 values are listed in 'Mutation event controlled vocabulary' in
400 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
401
402 Example :
403 Returns : string
404 Args : string
405
406 =cut
407
408
409 sub label {
410 my ($self,$value) = @_;
411 $self->throw("[$self] has not implemeted method 'label'");
412 }
413
414
415
416 =head2 status
417
418 Title : status
419 Usage : $obj->status()
420 Function:
421
422 Returns the status of the sequence change object.
423 Valid values are: 'suspected' and 'proven'
424
425 Example : $obj->status('proven');
426 Returns : scalar
427 Args : valid string (optional, for setting)
428
429
430 =cut
431
432
433 sub status {
434 my ($self,$value) = @_;
435 my %status = (suspected => 1,
436 proven => 1
437 );
438
439 if( defined $value) {
440 $value = lc $value;
441 if ($status{$value}) {
442 $self->{'status'} = $value;
443 }
444 else {
445 $self->throw("$value is not valid status value!");
446 }
447 }
448 if( ! exists $self->{'status'} ) {
449 return "$self";
450 }
451 return $self->{'status'};
452 }
453
454
455 =head2 proof
456
457 Title : proof
458 Usage : $obj->proof()
459 Function:
460
461 Returns the proof of the sequence change object.
462 Valid values are: 'computed' and 'experimental'.
463
464 Example : $obj->proof('computed');
465 Returns : scalar
466 Args : valid string (optional, for setting)
467
468
469 =cut
470
471
472 sub proof {
473 my ($self,$value) = @_;
474 my %proof = (computed => 1,
475 experimental => 1
476 );
477
478 if( defined $value) {
479 $value = lc $value;
480 if ($proof{$value}) {
481 $self->{'proof'} = $value;
482 } else {
483 $self->throw("$value is not valid proof value!");
484 }
485 }
486 return $self->{'proof'};
487 }
488
489
490 =head2 region
491
492 Title : region
493 Usage : $obj->region();
494 Function:
495
496 Sets and returns the name of the sequence region type or
497 protein domain at this location. If value is not set,
498 returns false.
499
500 Example :
501 Returns : string
502 Args : string
503
504 =cut
505
506
507 sub region {
508 my ($self,$value) = @_;
509 if( defined $value) {
510 $self->{'region'} = $value;
511 }
512 return $self->{'region'};
513 }
514
515
516 =head2 region_value
517
518 Title : region_value
519 Usage : $obj->region_value();
520 Function:
521
522 Sets and returns the name of the sequence region_value or
523 protein domain at this location. If value is not set,
524 returns false.
525
526 Example :
527 Returns : string
528 Args : string
529
530 =cut
531
532
533 sub region_value {
534 my ($self,$value) = @_;
535 if( defined $value) {
536 $self->{'region_value'} = $value;
537 }
538 return $self->{'region_value'};
539 }
540
541 =head2 region_dist
542
543 Title : region_dist
544 Usage : $obj->region_dist();
545 Function:
546
547 Sets and returns the distance tot the closest region
548 (i.e. intro/exon or domain) boundary. If distance is not
549 set, returns false.
550
551 Example :
552 Returns : integer
553 Args : integer
554
555 =cut
556
557
558 sub region_dist {
559 my ($self,$value) = @_;
560 if( defined $value) {
561 if ( not $value =~ /^[+-]?\d+$/ ) {
562 $self->throw("[$value] for region_dist has to be an integer\n");
563 } else {
564 $self->{'region_dist'} = $value;
565 }
566 }
567 return $self->{'region_dist'};
568 }
569
570
571 =head2 numbering
572
573 Title : numbering
574 Usage : $obj->numbering()
575 Function:
576
577 Returns the numbering chema used locating sequnce features.
578 Valid values are: 'entry' and 'coding'
579
580 Example : $obj->numbering('coding');
581 Returns : scalar
582 Args : valid string (optional, for setting)
583
584
585 =cut
586
587
588 sub numbering {
589 my ($self,$value) = @_;
590 my %numbering = (entry => 1,
591 coding => 1
592 );
593
594 if( defined $value) {
595 $value = lc $value;
596 if ($numbering{$value}) {
597 $self->{'numbering'} = $value;
598 }
599 else {
600 $self->throw("'$value' is not a valid for numbering!");
601 }
602 }
603 if( ! exists $self->{'numbering'} ) {
604 return "$self";
605 }
606 return $self->{'numbering'};
607 }
608
609 =head2 mut_number
610
611 Title : mut_number
612 Usage : $num = $obj->mut_number;
613 : $num = $obj->mut_number($number);
614 Function:
615
616 Returns or sets the number identifying the order in which the
617 mutation has been issued. Numbers shouldstart from 1.
618 If the number has never been set, the method will return ''
619
620 If you want the output from IO modules look nice and, for
621 multivariant/allele variations, make sense you better set
622 this attribute.
623
624 Returns : an integer
625
626 =cut
627
628
629 sub mut_number {
630 my ($self,$value) = @_;
631 if (defined $value) {
632 $self->{'mut_number'} = $value;
633 }
634 unless (exists $self->{'mut_number'}) {
635 return ('');
636 } else {
637 return $self->{'mut_number'};
638 }
639 }
640
641
642 =head2 SeqDiff
643
644 Title : SeqDiff
645 Usage : $mutobj = $obj->SeqDiff;
646 : $mutobj = $obj->SeqDiff($objref);
647 Function:
648
649 Returns or sets the link-reference to the umbrella
650 Bio::Variation::SeqDiff object. If there is no link,
651 it will return undef
652
653 Note: Adding a variant into a SeqDiff object will
654 automatically set this value.
655
656 Returns : an obj_ref or undef
657
658 See L<Bio::Variation::SeqDiff> for more information.
659
660 =cut
661
662 sub SeqDiff {
663 my ($self,$value) = @_;
664 if (defined $value) {
665 if( ! $value->isa('Bio::Variation::SeqDiff') ) {
666 $self->throw("Is not a Bio::Variation::SeqDiff object but a [$value]");
667 return (undef);
668 }
669 else {
670 $self->{'seqDiff'} = $value;
671 }
672 }
673 unless (exists $self->{'seqDiff'}) {
674 return (undef);
675 } else {
676 return $self->{'seqDiff'};
677 }
678 }
679
680 =head2 add_DBLink
681
682 Title : add_DBLink
683 Usage : $self->add_DBLink($ref)
684 Function: adds a link object
685 Example :
686 Returns :
687 Args :
688
689
690 =cut
691
692
693 sub add_DBLink{
694 my ($self,$com) = @_;
695 if( $com && ! $com->isa('Bio::Annotation::DBLink') ) {
696 $self->throw("Is not a link object but a [$com]");
697 }
698 $com && push(@{$self->{'link'}},$com);
699 }
700
701 =head2 each_DBLink
702
703 Title : each_DBLink
704 Usage : foreach $ref ( $self->each_DBlink() )
705 Function: gets an array of DBlink of objects
706 Example :
707 Returns :
708 Args :
709
710
711 =cut
712
713 sub each_DBLink{
714 my ($self) = @_;
715
716 return @{$self->{'link'}};
717 }
718
719 =head2 restriction_changes
720
721 Title : restriction_changes
722 Usage : $obj->restriction_changes();
723 Function:
724
725 Returns a string containing a list of restriction
726 enzyme changes of form +EcoRI, separated by
727 commas. Strings need to be valid restriction enzyme names
728 as stored in REBASE. allele_ori and allele_mut need to be assigned.
729
730 Example :
731 Returns : string
732 Args : string
733
734 =cut
735
736 sub restriction_changes {
737 my ($self) = @_;
738
739 if (not $self->{'re_changes'}) {
740 my %re = &_enzymes;
741
742 # complain if used on AA data
743 if ($self->isa('Bio::Variation::AAChange')) {
744 $self->throw('Restriction enzymes do not bite polypeptides!');
745 }
746
747 #sanity checks
748 $self->warn('Upstream sequence is empty!')
749 if $self->upStreamSeq eq '';
750 $self->warn('Downstream sequence is empty!')
751 if $self->dnStreamSeq eq '';
752 # $self->warn('Original allele sequence is empty!')
753 # if $self->allele_ori eq '';
754 # $self->warn('Mutated allele sequence is empty!')
755 # if $self->allele_mut eq '';
756
757 #reuse the non empty DNA level list at RNA level if the flanks are identical
758 #Hint: Check DNAMutation object first
759 if ($self->isa('Bio::Variation::RNAChange') and $self->DNAMutation and
760 $self->upStreamSeq eq $self->DNAMutation->upStreamSeq and
761 $self->dnStreamSeq eq $self->DNAMutation->dnStreamSeq and
762 $self->DNAMutation->restriction_changes ne '' ) {
763 $self->{'re_changes'} = $self->DNAMutation->restriction_changes;
764 } else {
765
766 #maximum length of a type II restriction site in the current REBASE
767 my ($le_dn) = 15;
768 my ($le_up) = $le_dn;
769
770 #reduce the flank lengths if the desired length is not available
771 $le_dn = CORE::length ($self->dnStreamSeq) if $le_dn > CORE::length ($self->dnStreamSeq);
772 $le_up = CORE::length ($self->upStreamSeq) if $le_up > CORE::length ($self->upStreamSeq);
773
774 #Build sequence strings to compare
775 my ($oriseq, $mutseq);
776 $oriseq = $mutseq = substr($self->upStreamSeq, -$le_up, $le_up);
777 $oriseq .= $self->allele_ori->seq if $self->allele_ori->seq;
778 $mutseq .= $self->allele_mut->seq if $self->allele_mut->seq;
779 $oriseq .= substr($self->dnStreamSeq, 0, $le_dn);
780 $mutseq .= substr($self->dnStreamSeq, 0, $le_dn);
781
782 # ... and their reverse complements
783 my $oriseq_rev = _revcompl ($oriseq);
784 my $mutseq_rev = _revcompl ($mutseq);
785
786 # collect results into a string
787 my $rec = '';
788 foreach my $enz (sort keys (%re)) {
789 my $site = $re{$enz};
790 my @ori = ($oriseq=~ /$site/g);
791 my @mut = ($mutseq=~ /$site/g);
792 my @ori_r = ($oriseq_rev =~ /$site/g);
793 my @mut_r = ($mutseq_rev =~ /$site/g);
794
795 $rec .= '+'. $enz. ", "
796 if (scalar @ori < scalar @mut) or (scalar @ori_r < scalar @mut_r);
797 $rec .= '-'. $enz. ", "
798 if (scalar @ori > scalar @mut) or (scalar @ori_r > scalar @mut_r);
799
800 }
801 $rec = substr($rec, 0, CORE::length($rec) - 2) if $rec ne '';
802 $self->{'re_changes'} = $rec;
803 }
804 }
805 return $self->{'re_changes'}
806 }
807
808
809 sub _revcompl {
810 # side effect: lower case letters
811 my ($seq) = shift;
812
813 $seq = lc $seq;
814 $seq =~ tr/acgtrymkswhbvdnx/tgcayrkmswdvbhnx/;
815 return CORE::reverse $seq;
816 }
817
818
819 sub _enzymes {
820 #REBASE version 005 type2.005
821 my %enzymes = (
822 'AarI' => 'cacctgc',
823 'AatII' => 'gacgtc',
824 'AccI' => 'gt[ac][gt]ac',
825 'AceIII' => 'cagctc',
826 'AciI' => 'ccgc',
827 'AclI' => 'aacgtt',
828 'AcyI' => 'g[ag]cg[ct]c',
829 'AflII' => 'cttaag',
830 'AflIII' => 'ac[ag][ct]gt',
831 'AgeI' => 'accggt',
832 'AhaIII' => 'tttaaa',
833 'AloI' => 'gaac[acgt][acgt][acgt][acgt][acgt][acgt]tcc',
834 'AluI' => 'agct',
835 'AlwNI' => 'cag[acgt][acgt][acgt]ctg',
836 'ApaBI' => 'gca[acgt][acgt][acgt][acgt][acgt]tgc',
837 'ApaI' => 'gggccc',
838 'ApaLI' => 'gtgcac',
839 'ApoI' => '[ag]aatt[ct]',
840 'AscI' => 'ggcgcgcc',
841 'AsuI' => 'gg[acgt]cc',
842 'AsuII' => 'ttcgaa',
843 'AvaI' => 'c[ct]cg[ag]g',
844 'AvaII' => 'gg[at]cc',
845 'AvaIII' => 'atgcat',
846 'AvrII' => 'cctagg',
847 'BaeI' => 'ac[acgt][acgt][acgt][acgt]gta[ct]c',
848 'BalI' => 'tggcca',
849 'BamHI' => 'ggatcc',
850 'BbvCI' => 'cctcagc',
851 'BbvI' => 'gcagc',
852 'BbvII' => 'gaagac',
853 'BccI' => 'ccatc',
854 'Bce83I' => 'cttgag',
855 'BcefI' => 'acggc',
856 'BcgI' => 'cga[acgt][acgt][acgt][acgt][acgt][acgt]tgc',
857 'BciVI' => 'gtatcc',
858 'BclI' => 'tgatca',
859 'BetI' => '[at]ccgg[at]',
860 'BfiI' => 'actggg',
861 'BglI' => 'gcc[acgt][acgt][acgt][acgt][acgt]ggc',
862 'BglII' => 'agatct',
863 'BinI' => 'ggatc',
864 'BmgI' => 'g[gt]gccc',
865 'BplI' => 'gag[acgt][acgt][acgt][acgt][acgt]ctc',
866 'Bpu10I' => 'cct[acgt]agc',
867 'BsaAI' => '[ct]acgt[ag]',
868 'BsaBI' => 'gat[acgt][acgt][acgt][acgt]atc',
869 'BsaXI' => 'ac[acgt][acgt][acgt][acgt][acgt]ctcc',
870 'BsbI' => 'caacac',
871 'BscGI' => 'cccgt',
872 'BseMII' => 'ctcag',
873 'BsePI' => 'gcgcgc',
874 'BseRI' => 'gaggag',
875 'BseSI' => 'g[gt]gc[ac]c',
876 'BsgI' => 'gtgcag',
877 'BsiI' => 'cacgag',
878 'BsiYI' => 'cc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gg',
879 'BsmAI' => 'gtctc',
880 'BsmI' => 'gaatgc',
881 'Bsp1407I' => 'tgtaca',
882 'Bsp24I' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]tgg',
883 'BspGI' => 'ctggac',
884 'BspHI' => 'tcatga',
885 'BspLU11I' => 'acatgt',
886 'BspMI' => 'acctgc',
887 'BspMII' => 'tccgga',
888 'BsrBI' => 'ccgctc',
889 'BsrDI' => 'gcaatg',
890 'BsrI' => 'actgg',
891 'BstEII' => 'ggt[acgt]acc',
892 'BstXI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]tgg',
893 'BtrI' => 'cacgtc',
894 'BtsI' => 'gcagtg',
895 'Cac8I' => 'gc[acgt][acgt]gc',
896 'CauII' => 'cc[cg]gg',
897 'Cfr10I' => '[ag]ccgg[ct]',
898 'CfrI' => '[ct]ggcc[ag]',
899 'CjeI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]gt',
900 'CjePI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt]tc',
901 'ClaI' => 'atcgat',
902 'CviJI' => '[ag]gc[ct]',
903 'CviRI' => 'tgca',
904 'DdeI' => 'ct[acgt]ag',
905 'DpnI' => 'gatc',
906 'DraII' => '[ag]gg[acgt]cc[ct]',
907 'DraIII' => 'cac[acgt][acgt][acgt]gtg',
908 'DrdI' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]gtc',
909 'DrdII' => 'gaacca',
910 'DsaI' => 'cc[ag][ct]gg',
911 'Eam1105I' => 'gac[acgt][acgt][acgt][acgt][acgt]gtc',
912 'EciI' => 'ggcgga',
913 'Eco31I' => 'ggtctc',
914 'Eco47III' => 'agcgct',
915 'Eco57I' => 'ctgaag',
916 'EcoNI' => 'cct[acgt][acgt][acgt][acgt][acgt]agg',
917 'EcoRI' => 'gaattc',
918 'EcoRII' => 'cc[at]gg',
919 'EcoRV' => 'gatatc',
920 'Esp3I' => 'cgtctc',
921 'EspI' => 'gct[acgt]agc',
922 'FauI' => 'cccgc',
923 'FinI' => 'gggac',
924 'Fnu4HI' => 'gc[acgt]gc',
925 'FnuDII' => 'cgcg',
926 'FokI' => 'ggatg',
927 'FseI' => 'ggccggcc',
928 'GdiII' => 'cggcc[ag]',
929 'GsuI' => 'ctggag',
930 'HaeI' => '[at]ggcc[at]',
931 'HaeII' => '[ag]gcgc[ct]',
932 'HaeIII' => 'ggcc',
933 'HaeIV' => 'ga[ct][acgt][acgt][acgt][acgt][acgt][ag]tc',
934 'HgaI' => 'gacgc',
935 'HgiAI' => 'g[at]gc[at]c',
936 'HgiCI' => 'gg[ct][ag]cc',
937 'HgiEII' => 'acc[acgt][acgt][acgt][acgt][acgt][acgt]ggt',
938 'HgiJII' => 'g[ag]gc[ct]c',
939 'HhaI' => 'gcgc',
940 'Hin4I' => 'ga[cgt][acgt][acgt][acgt][acgt][acgt][acg]tc',
941 'HindII' => 'gt[ct][ag]ac',
942 'HindIII' => 'aagctt',
943 'HinfI' => 'ga[acgt]tc',
944 'HpaI' => 'gttaac',
945 'HpaII' => 'ccgg',
946 'HphI' => 'ggtga',
947 'Hpy178III' => 'tc[acgt][acgt]ga',
948 'Hpy188I' => 'tc[acgt]ga',
949 'Hpy99I' => 'cg[at]cg',
950 'KpnI' => 'ggtacc',
951 'Ksp632I' => 'ctcttc',
952 'MaeI' => 'ctag',
953 'MaeII' => 'acgt',
954 'MaeIII' => 'gt[acgt]ac',
955 'MboI' => 'gatc',
956 'MboII' => 'gaaga',
957 'McrI' => 'cg[ag][ct]cg',
958 'MfeI' => 'caattg',
959 'MjaIV' => 'gt[acgt][acgt]ac',
960 'MluI' => 'acgcgt',
961 'MmeI' => 'tcc[ag]ac',
962 'MnlI' => 'cctc',
963 'MseI' => 'ttaa',
964 'MslI' => 'ca[ct][acgt][acgt][acgt][acgt][ag]tg',
965 'MstI' => 'tgcgca',
966 'MwoI' => 'gc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gc',
967 'NaeI' => 'gccggc',
968 'NarI' => 'ggcgcc',
969 'NcoI' => 'ccatgg',
970 'NdeI' => 'catatg',
971 'NheI' => 'gctagc',
972 'NlaIII' => 'catg',
973 'NlaIV' => 'gg[acgt][acgt]cc',
974 'NotI' => 'gcggccgc',
975 'NruI' => 'tcgcga',
976 'NspBII' => 'c[ac]gc[gt]g',
977 'NspI' => '[ag]catg[ct]',
978 'PacI' => 'ttaattaa',
979 'Pfl1108I' => 'tcgtag',
980 'PflMI' => 'cca[acgt][acgt][acgt][acgt][acgt]tgg',
981 'PleI' => 'gagtc',
982 'PmaCI' => 'cacgtg',
983 'PmeI' => 'gtttaaac',
984 'PpiI' => 'gaac[acgt][acgt][acgt][acgt][acgt]ctc',
985 'PpuMI' => '[ag]gg[at]cc[ct]',
986 'PshAI' => 'gac[acgt][acgt][acgt][acgt]gtc',
987 'PsiI' => 'ttataa',
988 'PstI' => 'ctgcag',
989 'PvuI' => 'cgatcg',
990 'PvuII' => 'cagctg',
991 'RleAI' => 'cccaca',
992 'RsaI' => 'gtac',
993 'RsrII' => 'cgg[at]ccg',
994 'SacI' => 'gagctc',
995 'SacII' => 'ccgcgg',
996 'SalI' => 'gtcgac',
997 'SanDI' => 'ggg[at]ccc',
998 'SapI' => 'gctcttc',
999 'SauI' => 'cct[acgt]agg',
1000 'ScaI' => 'agtact',
1001 'ScrFI' => 'cc[acgt]gg',
1002 'SduI' => 'g[agt]gc[act]c',
1003 'SecI' => 'cc[acgt][acgt]gg',
1004 'SexAI' => 'acc[at]ggt',
1005 'SfaNI' => 'gcatc',
1006 'SfeI' => 'ct[ag][ct]ag',
1007 'SfiI' => 'ggcc[acgt][acgt][acgt][acgt][acgt]ggcc',
1008 'SgfI' => 'gcgatcgc',
1009 'SgrAI' => 'c[ag]ccgg[ct]g',
1010 'SimI' => 'gggtc',
1011 'SmaI' => 'cccggg',
1012 'SmlI' => 'ct[ct][ag]ag',
1013 'SnaBI' => 'tacgta',
1014 'SnaI' => 'gtatac',
1015 'SpeI' => 'actagt',
1016 'SphI' => 'gcatgc',
1017 'SplI' => 'cgtacg',
1018 'SrfI' => 'gcccgggc',
1019 'Sse232I' => 'cgccggcg',
1020 'Sse8387I' => 'cctgcagg',
1021 'Sse8647I' => 'agg[at]cct',
1022 'SspI' => 'aatatt',
1023 'Sth132I' => 'cccg',
1024 'StuI' => 'aggcct',
1025 'StyI' => 'cc[at][at]gg',
1026 'SwaI' => 'atttaaat',
1027 'TaqI' => 'tcga',
1028 'TaqII' => 'gaccga',
1029 'TatI' => '[at]gtac[at]',
1030 'TauI' => 'gc[cg]gc',
1031 'TfiI' => 'ga[at]tc',
1032 'TseI' => 'gc[at]gc',
1033 'Tsp45I' => 'gt[cg]ac',
1034 'Tsp4CI' => 'ac[acgt]gt',
1035 'TspEI' => 'aatt',
1036 'TspRI' => 'ca[cg]tg[acgt][acgt]',
1037 'Tth111I' => 'gac[acgt][acgt][acgt]gtc',
1038 'Tth111II' => 'caa[ag]ca',
1039 'UbaGI' => 'cac[acgt][acgt][acgt][acgt]gtg',
1040 'UbaPI' => 'cgaacg',
1041 'VspI' => 'attaat',
1042 'XbaI' => 'tctaga',
1043 'XcmI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt]tgg',
1044 'XhoI' => 'ctcgag',
1045 'XhoII' => '[ag]gatc[ct]',
1046 'XmaIII' => 'cggccg',
1047 'XmnI' => 'gaa[acgt][acgt][acgt][acgt]ttc'
1048 );
1049
1050 return %enzymes;
1051 }
1052
1053 1;