0
|
1 #------------------------------------------------------------------
|
|
2 # $Id: RestrictionEnzyme.pm,v 1.25.2.1 2003/06/29 00:53:20 jason Exp $
|
|
3 #
|
|
4 # BioPerl module Bio::Tools::RestrictionEnzyme
|
|
5 #
|
|
6 # Cared for by Steve Chervitz <sac@bioperl.org>
|
|
7 #
|
|
8 # You may distribute this module under the same terms as perl itself
|
|
9 #------------------------------------------------------------------
|
|
10
|
|
11 ## POD Documentation:
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Tools::RestrictionEnzyme - Bioperl object for a restriction endonuclease
|
|
16 (cuts DNA at specific locations)
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19
|
|
20 =head2 Object Creation
|
|
21
|
|
22 require Bio::Tools::RestrictionEnzyme;
|
|
23
|
|
24 ## Create a new object by name.
|
|
25
|
|
26 $re1 = new Bio::Tools::RestrictionEnzyme(-NAME =>'EcoRI');
|
|
27
|
|
28 ## Create a new object using special syntax
|
|
29 ## which specifies the enzyme name, recognition site, and cut position.
|
|
30 ## Used for enzymes not known to this module.
|
|
31
|
|
32 $re2 = new Bio::Tools::RestrictionEnzyme(-NAME =>'EcoRV--GAT^ATC',
|
|
33 -MAKE =>'custom');
|
|
34
|
|
35 ## Get a list of the resulting fragments when a sequence is cut with
|
|
36 ## the given enzyme. The method expects a Bio::Seq object.
|
|
37
|
|
38 @fragments = $re2->cut_seq($seqobj);
|
|
39
|
|
40 ## Get a list of names of all available restriction enzymes
|
|
41 ## known to this module.
|
|
42
|
|
43 @all = $re->available_list();
|
|
44
|
|
45 ## Get the names of restriction enzymes that have 6 bp
|
|
46 ## recognition sequences.
|
|
47
|
|
48 @sixcutters = $re->available_list(6);
|
|
49
|
|
50
|
|
51 =head1 INSTALLATION
|
|
52
|
|
53 This module is included with the central Bioperl distribution:
|
|
54
|
|
55 http://bio.perl.org/Core/Latest
|
|
56 ftp://bio.perl.org/pub/DIST
|
|
57
|
|
58 Follow the installation instructions included in the README file.
|
|
59
|
|
60 =head1 DESCRIPTION
|
|
61
|
|
62 The Bio::Tools::RestrictionEnzyme.pm module encapsulates generic data and
|
|
63 methods for using restriction endonucleases for in silico restriction
|
|
64 analysis of DNA sequences.
|
|
65
|
|
66 =head2 Considerations
|
|
67
|
|
68 This module is a precursor for a more full featured version that may do such
|
|
69 things as download data from online databases such as REBase http://www.neb.com/rebase/.
|
|
70 Thus, there is currently no functionality for obtaining data about commercial
|
|
71 availability for a restriction enzyme.
|
|
72
|
|
73 At some point in the future, it may make sense to derive RestrictionEnzymes
|
|
74 from a class such as Bio::Enzyme or Bio::Prot::Protein (neither of which now
|
|
75 exist) so that more data about the enzyme and related information can be
|
|
76 easily obtained.
|
|
77
|
|
78 This module is currently in use at
|
|
79
|
|
80 http://genome-www.stanford.edu/Sacch3D/analysis/
|
|
81
|
|
82
|
|
83 =head2 Digesting on Runs of N
|
|
84
|
|
85 To digest a sequence on runs of N's in the sequence. Here's what you can do:
|
|
86
|
|
87 $re_n = new Bio::Tools::RestrictionEnzyme(-name=>'N--NNNNN',
|
|
88 -make=>'custom');
|
|
89
|
|
90 Specify the number of N's you want to match in the -name parameter.
|
|
91 So the above example will recognize and cut at runs of 5 Ns.
|
|
92 If you wanted to cut at runs of 10 N's, you would use
|
|
93
|
|
94 -name => 'N--NNNNNNNNNN'
|
|
95
|
|
96 Note that you must use a specific number of N's, you cannot use a regexp to
|
|
97 digest at N+ for example, because the actual number of N's at each site are
|
|
98 not recorded when the sequence is analyzed. So cut_locations( ) wouldn't be
|
|
99 correct.
|
|
100
|
|
101 =head1 EXAMPLES
|
|
102
|
|
103 See the script examples/restriction.pl in the Bioperl distribution.
|
|
104
|
|
105 =head1 DEPENDENCIES
|
|
106
|
|
107 Bio::Tools::RestrictionEnzyme.pm is a concrete class that inherits from
|
|
108 B<Bio::Root::Root> and uses by delegation B<Bio::PrimarySeq>.
|
|
109
|
|
110 =head1 FEEDBACK
|
|
111
|
|
112 =head2 Mailing Lists
|
|
113
|
|
114 User feedback is an integral part of the evolution of this and other Bioperl
|
|
115 modules. Send your comments and suggestions preferably to one of the Bioperl
|
|
116 mailing lists. Your participation is much appreciated.
|
|
117
|
|
118 bioperl-l@bioperl.org - General discussion
|
|
119 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
120
|
|
121 =head2 Reporting Bugs
|
|
122
|
|
123 Report bugs to the Bioperl bug tracking system to help us keep track the bugs
|
|
124 and their resolution. Bug reports can be submitted via email or the web:
|
|
125
|
|
126 bioperl-bugs@bio.perl.org
|
|
127 http://bugzilla.bioperl.org/
|
|
128
|
|
129 =head1 AUTHOR
|
|
130
|
|
131 Steve Chervitz, E<lt>sac@bioperl.orgE<gt>
|
|
132
|
|
133 =head1 COPYRIGHT
|
|
134
|
|
135 Copyright (c) 1997-2002 Steve A. Chervitz. All Rights Reserved.
|
|
136 This module is free software; you can redistribute it and/or
|
|
137 modify it under the same terms as Perl itself.
|
|
138
|
|
139 =head1 SEE ALSO
|
|
140
|
|
141 Bio::Root::Root - Base class.
|
|
142 Bio::PrimarySeq - Lightweight sequence object.
|
|
143
|
|
144 http://bio.perl.org/ - Bioperl Project Homepage
|
|
145
|
|
146 =cut
|
|
147
|
|
148 #
|
|
149 ##
|
|
150 ###
|
|
151 #### END of main POD documentation.
|
|
152 ###
|
|
153 ##
|
|
154 #'
|
|
155
|
|
156
|
|
157 =head1 APPENDIX
|
|
158
|
|
159 Methods beginning with a leading underscore are considered private
|
|
160 and are intended for internal use by this module. They are
|
|
161 B<not> considered part of the public interface and are described here
|
|
162 for documentation purposes only.
|
|
163
|
|
164 =cut
|
|
165
|
|
166
|
|
167 package Bio::Tools::RestrictionEnzyme;
|
|
168 use strict;
|
|
169
|
|
170 use Bio::Root::Root;
|
|
171 use Exporter;
|
|
172
|
|
173 use vars qw (@ISA @EXPORT_OK %EXPORT_TAGS $ID $version @RE_available $Revision);
|
|
174
|
|
175 @ISA = qw(Bio::Root::Root Exporter);
|
|
176 @EXPORT_OK = qw(@RE_available);
|
|
177 %EXPORT_TAGS = ( std => [qw(@RE_available)] );
|
|
178
|
|
179 $ID = 'Bio::Tools::RestrictionEnzyme';
|
|
180 $version = 0.04;
|
|
181 $Revision = '$Id: RestrictionEnzyme.pm,v 1.25.2.1 2003/06/29 00:53:20 jason Exp $'; #'
|
|
182
|
|
183 # Generated from REBASE version 208 (strider format), dated Aug 1 2002
|
|
184 # using scripts/contributed/rebase2list.pl
|
|
185 # Syntax: RE-name => 'SITE CUTS-AT' where SITE and CUTS-AT are separated
|
|
186 # by a space.
|
|
187
|
|
188 my %RE = (
|
|
189 'AasI' => 'GACNNNNNNGTC 7',
|
|
190 'AatI' => 'AGGCCT 3',
|
|
191 'AatII' => 'GACGTC 5',
|
|
192 'AauI' => 'TGTACA 1',
|
|
193 'AccI' => 'GTMKAC 2',
|
|
194 'AccII' => 'CGCG 2',
|
|
195 'AccIII' => 'TCCGGA 1',
|
|
196 'Acc16I' => 'TGCGCA 3',
|
|
197 'Acc65I' => 'GGTACC 1',
|
|
198 'Acc113I' => 'AGTACT 3',
|
|
199 'AccB1I' => 'GGYRCC 1',
|
|
200 'AccB7I' => 'CCANNNNNTGG 7',
|
|
201 'AclI' => 'AACGTT 2',
|
|
202 'AcsI' => 'RAATTY 1',
|
|
203 'AcvI' => 'CACGTG 3',
|
|
204 'AcyI' => 'GRCGYC 2',
|
|
205 'AdeI' => 'CACNNNGTG 6',
|
|
206 'AfaI' => 'GTAC 2',
|
|
207 'AfeI' => 'AGCGCT 3',
|
|
208 'AflI' => 'GGWCC 1',
|
|
209 'AflII' => 'CTTAAG 1',
|
|
210 'AflIII' => 'ACRYGT 1',
|
|
211 'AgeI' => 'ACCGGT 1',
|
|
212 'AhaIII' => 'TTTAAA 3',
|
|
213 'AhdI' => 'GACNNNNNGTC 6',
|
|
214 'AhlI' => 'ACTAGT 1',
|
|
215 'AleI' => 'CACNNNNGTG 5',
|
|
216 'AluI' => 'AGCT 2',
|
|
217 'Alw21I' => 'GWGCWC 5',
|
|
218 'Alw44I' => 'GTGCAC 1',
|
|
219 'AlwNI' => 'CAGNNNCTG 6',
|
|
220 'Ama87I' => 'CYCGRG 1',
|
|
221 'AocI' => 'CCTNAGG 2',
|
|
222 'Aor51HI' => 'AGCGCT 3',
|
|
223 'ApaI' => 'GGGCCC 5',
|
|
224 'ApaBI' => 'GCANNNNNTGC 8',
|
|
225 'ApaLI' => 'GTGCAC 1',
|
|
226 'ApoI' => 'RAATTY 1',
|
|
227 'AscI' => 'GGCGCGCC 2',
|
|
228 'AseI' => 'ATTAAT 2',
|
|
229 'AsiAI' => 'ACCGGT 1',
|
|
230 'AsiSI' => 'GCGATCGC 5',
|
|
231 'AsnI' => 'ATTAAT 2',
|
|
232 'AspI' => 'GACNNNGTC 4',
|
|
233 'Asp700I' => 'GAANNNNTTC 5',
|
|
234 'Asp718I' => 'GGTACC 1',
|
|
235 'AspEI' => 'GACNNNNNGTC 6',
|
|
236 'AspHI' => 'GWGCWC 5',
|
|
237 'AspLEI' => 'GCGC 3',
|
|
238 'AspS9I' => 'GGNCC 1',
|
|
239 'AsuI' => 'GGNCC 1',
|
|
240 'AsuII' => 'TTCGAA 2',
|
|
241 'AsuC2I' => 'CCSGG 2',
|
|
242 'AsuNHI' => 'GCTAGC 1',
|
|
243 'AvaI' => 'CYCGRG 1',
|
|
244 'AvaII' => 'GGWCC 1',
|
|
245 'AviII' => 'TGCGCA 3',
|
|
246 'AvrII' => 'CCTAGG 1',
|
|
247 'AxyI' => 'CCTNAGG 2',
|
|
248 'BalI' => 'TGGCCA 3',
|
|
249 'BamHI' => 'GGATCC 1',
|
|
250 'BanI' => 'GGYRCC 1',
|
|
251 'BanII' => 'GRGCYC 5',
|
|
252 'BanIII' => 'ATCGAT 2',
|
|
253 'BbeI' => 'GGCGCC 5',
|
|
254 'BbrPI' => 'CACGTG 3',
|
|
255 'BbuI' => 'GCATGC 5',
|
|
256 'Bbv12I' => 'GWGCWC 5',
|
|
257 'BclI' => 'TGATCA 1',
|
|
258 'BcnI' => 'CCSGG 2',
|
|
259 'BcoI' => 'CYCGRG 1',
|
|
260 'BcuI' => 'ACTAGT 1',
|
|
261 'BetI' => 'WCCGGW 1',
|
|
262 'BfaI' => 'CTAG 1',
|
|
263 'BfmI' => 'CTRYAG 1',
|
|
264 'BfrI' => 'CTTAAG 1',
|
|
265 'BfrBI' => 'ATGCAT 3',
|
|
266 'BfuCI' => 'GATC 0',
|
|
267 'BglI' => 'GCCNNNNNGGC 7',
|
|
268 'BglII' => 'AGATCT 1',
|
|
269 'BlnI' => 'CCTAGG 1',
|
|
270 'BloHII' => 'CTGCAG 5',
|
|
271 'BlpI' => 'GCTNAGC 2',
|
|
272 'Bme18I' => 'GGWCC 1',
|
|
273 'Bme1390I' => 'CCNGG 2',
|
|
274 'Bme1580I' => 'GKGCMC 5',
|
|
275 'BmtI' => 'GCTAGC 5',
|
|
276 'BmyI' => 'GDGCHC 5',
|
|
277 'BoxI' => 'GACNNNNGTC 5',
|
|
278 'Bpu14I' => 'TTCGAA 2',
|
|
279 'Bpu1102I' => 'GCTNAGC 2',
|
|
280 'Bsa29I' => 'ATCGAT 2',
|
|
281 'BsaAI' => 'YACGTR 3',
|
|
282 'BsaBI' => 'GATNNNNATC 5',
|
|
283 'BsaHI' => 'GRCGYC 2',
|
|
284 'BsaJI' => 'CCNNGG 1',
|
|
285 'BsaOI' => 'CGRYCG 4',
|
|
286 'BsaWI' => 'WCCGGW 1',
|
|
287 'BscI' => 'ATCGAT 2',
|
|
288 'Bsc4I' => 'CCNNNNNNNGG 7',
|
|
289 'BscBI' => 'GGNNCC 3',
|
|
290 'BscFI' => 'GATC 0',
|
|
291 'Bse8I' => 'GATNNNNATC 5',
|
|
292 'Bse21I' => 'CCTNAGG 2',
|
|
293 'Bse118I' => 'RCCGGY 1',
|
|
294 'BseAI' => 'TCCGGA 1',
|
|
295 'BseBI' => 'CCWGG 2',
|
|
296 'BseCI' => 'ATCGAT 2',
|
|
297 'BseDI' => 'CCNNGG 1',
|
|
298 'BseJI' => 'GATNNNNATC 5',
|
|
299 'BseLI' => 'CCNNNNNNNGG 7',
|
|
300 'BsePI' => 'GCGCGC 1',
|
|
301 'BseSI' => 'GKGCMC 5',
|
|
302 'BseX3I' => 'CGGCCG 1',
|
|
303 'BshI' => 'GGCC 2',
|
|
304 'Bsh1236I' => 'CGCG 2',
|
|
305 'Bsh1285I' => 'CGRYCG 4',
|
|
306 'BshFI' => 'GGCC 2',
|
|
307 'BshNI' => 'GGYRCC 1',
|
|
308 'BshTI' => 'ACCGGT 1',
|
|
309 'BsiBI' => 'GATNNNNATC 5',
|
|
310 'BsiCI' => 'TTCGAA 2',
|
|
311 'BsiEI' => 'CGRYCG 4',
|
|
312 'BsiHKAI' => 'GWGCWC 5',
|
|
313 'BsiHKCI' => 'CYCGRG 1',
|
|
314 'BsiLI' => 'CCWGG 2',
|
|
315 'BsiMI' => 'TCCGGA 1',
|
|
316 'BsiQI' => 'TGATCA 1',
|
|
317 'BsiSI' => 'CCGG 1',
|
|
318 'BsiWI' => 'CGTACG 1',
|
|
319 'BsiXI' => 'ATCGAT 2',
|
|
320 'BsiYI' => 'CCNNNNNNNGG 7',
|
|
321 'BsiZI' => 'GGNCC 1',
|
|
322 'BslI' => 'CCNNNNNNNGG 7',
|
|
323 'BsoBI' => 'CYCGRG 1',
|
|
324 'Bsp13I' => 'TCCGGA 1',
|
|
325 'Bsp19I' => 'CCATGG 1',
|
|
326 'Bsp68I' => 'TCGCGA 3',
|
|
327 'Bsp106I' => 'ATCGAT 2',
|
|
328 'Bsp119I' => 'TTCGAA 2',
|
|
329 'Bsp120I' => 'GGGCCC 1',
|
|
330 'Bsp143I' => 'GATC 0',
|
|
331 'Bsp143II' => 'RGCGCY 5',
|
|
332 'Bsp1286I' => 'GDGCHC 5',
|
|
333 'Bsp1407I' => 'TGTACA 1',
|
|
334 'Bsp1720I' => 'GCTNAGC 2',
|
|
335 'BspA2I' => 'CCTAGG 1',
|
|
336 'BspCI' => 'CGATCG 4',
|
|
337 'BspDI' => 'ATCGAT 2',
|
|
338 'BspEI' => 'TCCGGA 1',
|
|
339 'BspHI' => 'TCATGA 1',
|
|
340 'BspLI' => 'GGNNCC 3',
|
|
341 'BspLU11I' => 'ACATGT 1',
|
|
342 'BspMII' => 'TCCGGA 1',
|
|
343 'BspTI' => 'CTTAAG 1',
|
|
344 'BspT104I' => 'TTCGAA 2',
|
|
345 'BspT107I' => 'GGYRCC 1',
|
|
346 'BspXI' => 'ATCGAT 2',
|
|
347 'BsrBRI' => 'GATNNNNATC 5',
|
|
348 'BsrFI' => 'RCCGGY 1',
|
|
349 'BsrGI' => 'TGTACA 1',
|
|
350 'BssAI' => 'RCCGGY 1',
|
|
351 'BssECI' => 'CCNNGG 1',
|
|
352 'BssHI' => 'CTCGAG 1',
|
|
353 'BssHII' => 'GCGCGC 1',
|
|
354 'BssKI' => 'CCNGG 0',
|
|
355 'BssNAI' => 'GTATAC 3',
|
|
356 'BssT1I' => 'CCWWGG 1',
|
|
357 'Bst98I' => 'CTTAAG 1',
|
|
358 'Bst1107I' => 'GTATAC 3',
|
|
359 'BstACI' => 'GRCGYC 2',
|
|
360 'BstAPI' => 'GCANNNNNTGC 7',
|
|
361 'BstBI' => 'TTCGAA 2',
|
|
362 'BstBAI' => 'YACGTR 3',
|
|
363 'Bst4CI' => 'ACNGT 3',
|
|
364 'BstC8I' => 'GCNNGC 3',
|
|
365 'BstDEI' => 'CTNAG 1',
|
|
366 'BstDSI' => 'CCRYGG 1',
|
|
367 'BstEII' => 'GGTNACC 1',
|
|
368 'BstENI' => 'CCTNNNNNAGG 5',
|
|
369 'BstENII' => 'GATC 0',
|
|
370 'BstFNI' => 'CGCG 2',
|
|
371 'BstH2I' => 'RGCGCY 5',
|
|
372 'BstHHI' => 'GCGC 3',
|
|
373 'BstHPI' => 'GTTAAC 3',
|
|
374 'BstKTI' => 'GATC 3',
|
|
375 'BstMAI' => 'CTGCAG 5',
|
|
376 'BstMCI' => 'CGRYCG 4',
|
|
377 'BstMWI' => 'GCNNNNNNNGC 7',
|
|
378 'BstNI' => 'CCWGG 2',
|
|
379 'BstNSI' => 'RCATGY 5',
|
|
380 'BstOI' => 'CCWGG 2',
|
|
381 'BstPI' => 'GGTNACC 1',
|
|
382 'BstPAI' => 'GACNNNNGTC 5',
|
|
383 'BstSCI' => 'CCNGG 0',
|
|
384 'BstSFI' => 'CTRYAG 1',
|
|
385 'BstSNI' => 'TACGTA 3',
|
|
386 'BstUI' => 'CGCG 2',
|
|
387 'Bst2UI' => 'CCWGG 2',
|
|
388 'BstXI' => 'CCANNNNNNTGG 8',
|
|
389 'BstX2I' => 'RGATCY 1',
|
|
390 'BstYI' => 'RGATCY 1',
|
|
391 'BstZI' => 'CGGCCG 1',
|
|
392 'BstZ17I' => 'GTATAC 3',
|
|
393 'Bsu15I' => 'ATCGAT 2',
|
|
394 'Bsu36I' => 'CCTNAGG 2',
|
|
395 'BsuRI' => 'GGCC 2',
|
|
396 'BsuTUI' => 'ATCGAT 2',
|
|
397 'BtgI' => 'CCRYGG 1',
|
|
398 'BthCI' => 'GCNGC 4',
|
|
399 'Cac8I' => 'GCNNGC 3',
|
|
400 'CaiI' => 'CAGNNNCTG 6',
|
|
401 'CauII' => 'CCSGG 2',
|
|
402 'CciNI' => 'GCGGCCGC 2',
|
|
403 'CelII' => 'GCTNAGC 2',
|
|
404 'CfoI' => 'GCGC 3',
|
|
405 'CfrI' => 'YGGCCR 1',
|
|
406 'Cfr9I' => 'CCCGGG 1',
|
|
407 'Cfr10I' => 'RCCGGY 1',
|
|
408 'Cfr13I' => 'GGNCC 1',
|
|
409 'Cfr42I' => 'CCGCGG 4',
|
|
410 'ChaI' => 'GATC 4',
|
|
411 'ClaI' => 'ATCGAT 2',
|
|
412 'CpoI' => 'CGGWCCG 2',
|
|
413 'CspI' => 'CGGWCCG 2',
|
|
414 'Csp6I' => 'GTAC 1',
|
|
415 'Csp45I' => 'TTCGAA 2',
|
|
416 'CspAI' => 'ACCGGT 1',
|
|
417 'CviAII' => 'CATG 1',
|
|
418 'CviJI' => 'RGCY 2',
|
|
419 'CviRI' => 'TGCA 2',
|
|
420 'CviTI' => 'RGCY 2',
|
|
421 'CvnI' => 'CCTNAGG 2',
|
|
422 'DdeI' => 'CTNAG 1',
|
|
423 'DpnI' => 'GATC 2',
|
|
424 'DpnII' => 'GATC 0',
|
|
425 'DraI' => 'TTTAAA 3',
|
|
426 'DraII' => 'RGGNCCY 2',
|
|
427 'DraIII' => 'CACNNNGTG 6',
|
|
428 'DrdI' => 'GACNNNNNNGTC 7',
|
|
429 'DsaI' => 'CCRYGG 1',
|
|
430 'DseDI' => 'GACNNNNNNGTC 7',
|
|
431 'EaeI' => 'YGGCCR 1',
|
|
432 'EagI' => 'CGGCCG 1',
|
|
433 'Eam1105I' => 'GACNNNNNGTC 6',
|
|
434 'Ecl136II' => 'GAGCTC 3',
|
|
435 'EclHKI' => 'GACNNNNNGTC 6',
|
|
436 'EclXI' => 'CGGCCG 1',
|
|
437 'Eco24I' => 'GRGCYC 5',
|
|
438 'Eco32I' => 'GATATC 3',
|
|
439 'Eco47I' => 'GGWCC 1',
|
|
440 'Eco47III' => 'AGCGCT 3',
|
|
441 'Eco52I' => 'CGGCCG 1',
|
|
442 'Eco72I' => 'CACGTG 3',
|
|
443 'Eco81I' => 'CCTNAGG 2',
|
|
444 'Eco88I' => 'CYCGRG 1',
|
|
445 'Eco91I' => 'GGTNACC 1',
|
|
446 'Eco105I' => 'TACGTA 3',
|
|
447 'Eco130I' => 'CCWWGG 1',
|
|
448 'Eco147I' => 'AGGCCT 3',
|
|
449 'EcoHI' => 'CCSGG 0',
|
|
450 'EcoICRI' => 'GAGCTC 3',
|
|
451 'EcoNI' => 'CCTNNNNNAGG 5',
|
|
452 'EcoO65I' => 'GGTNACC 1',
|
|
453 'EcoO109I' => 'RGGNCCY 2',
|
|
454 'EcoRI' => 'GAATTC 1',
|
|
455 'EcoRII' => 'CCWGG 0',
|
|
456 'EcoRV' => 'GATATC 3',
|
|
457 'EcoT14I' => 'CCWWGG 1',
|
|
458 'EcoT22I' => 'ATGCAT 5',
|
|
459 'EcoT38I' => 'GRGCYC 5',
|
|
460 'EgeI' => 'GGCGCC 3',
|
|
461 'EheI' => 'GGCGCC 3',
|
|
462 'ErhI' => 'CCWWGG 1',
|
|
463 'EsaBC3I' => 'TCGA 2',
|
|
464 'EspI' => 'GCTNAGC 2',
|
|
465 'FatI' => 'CATG 0',
|
|
466 'FauNDI' => 'CATATG 2',
|
|
467 'FbaI' => 'TGATCA 1',
|
|
468 'FblI' => 'GTMKAC 2',
|
|
469 'FmuI' => 'GGNCC 4',
|
|
470 'FnuDII' => 'CGCG 2',
|
|
471 'Fnu4HI' => 'GCNGC 2',
|
|
472 'FriOI' => 'GRGCYC 5',
|
|
473 'FseI' => 'GGCCGGCC 6',
|
|
474 'FspI' => 'TGCGCA 3',
|
|
475 'FspAI' => 'RTGCGCAY 4',
|
|
476 'Fsp4HI' => 'GCNGC 2',
|
|
477 'FunI' => 'AGCGCT 3',
|
|
478 'FunII' => 'GAATTC 1',
|
|
479 'HaeI' => 'WGGCCW 3',
|
|
480 'HaeII' => 'RGCGCY 5',
|
|
481 'HaeIII' => 'GGCC 2',
|
|
482 'HapII' => 'CCGG 1',
|
|
483 'HgiAI' => 'GWGCWC 5',
|
|
484 'HgiCI' => 'GGYRCC 1',
|
|
485 'HgiJII' => 'GRGCYC 5',
|
|
486 'HhaI' => 'GCGC 3',
|
|
487 'Hin1I' => 'GRCGYC 2',
|
|
488 'Hin6I' => 'GCGC 1',
|
|
489 'HinP1I' => 'GCGC 1',
|
|
490 'HincII' => 'GTYRAC 3',
|
|
491 'HindII' => 'GTYRAC 3',
|
|
492 'HindIII' => 'AAGCTT 1',
|
|
493 'HinfI' => 'GANTC 1',
|
|
494 'HpaI' => 'GTTAAC 3',
|
|
495 'HpaII' => 'CCGG 1',
|
|
496 'Hpy8I' => 'GTNNAC 3',
|
|
497 'Hpy99I' => 'CGWCG 5',
|
|
498 'Hpy178III' => 'TCNNGA 2',
|
|
499 'Hpy188I' => 'TCNGA 3',
|
|
500 'Hpy188III' => 'TCNNGA 2',
|
|
501 'HpyCH4I' => 'CATG 3',
|
|
502 'HpyCH4III' => 'ACNGT 3',
|
|
503 'HpyCH4IV' => 'ACGT 1',
|
|
504 'HpyCH4V' => 'TGCA 2',
|
|
505 'HpyF10VI' => 'GCNNNNNNNGC 8',
|
|
506 'Hsp92I' => 'GRCGYC 2',
|
|
507 'Hsp92II' => 'CATG 4',
|
|
508 'HspAI' => 'GCGC 1',
|
|
509 'ItaI' => 'GCNGC 2',
|
|
510 'KasI' => 'GGCGCC 1',
|
|
511 'KpnI' => 'GGTACC 5',
|
|
512 'Kpn2I' => 'TCCGGA 1',
|
|
513 'KspI' => 'CCGCGG 4',
|
|
514 'Ksp22I' => 'TGATCA 1',
|
|
515 'KspAI' => 'GTTAAC 3',
|
|
516 'Kzo9I' => 'GATC 0',
|
|
517 'LpnI' => 'RGCGCY 3',
|
|
518 'LspI' => 'TTCGAA 2',
|
|
519 'MabI' => 'ACCWGGT 1',
|
|
520 'MaeI' => 'CTAG 1',
|
|
521 'MaeII' => 'ACGT 1',
|
|
522 'MaeIII' => 'GTNAC 0',
|
|
523 'MamI' => 'GATNNNNATC 5',
|
|
524 'MboI' => 'GATC 0',
|
|
525 'McrI' => 'CGRYCG 4',
|
|
526 'MfeI' => 'CAATTG 1',
|
|
527 'MflI' => 'RGATCY 1',
|
|
528 'MhlI' => 'GDGCHC 5',
|
|
529 'MlsI' => 'TGGCCA 3',
|
|
530 'MluI' => 'ACGCGT 1',
|
|
531 'MluNI' => 'TGGCCA 3',
|
|
532 'Mly113I' => 'GGCGCC 2',
|
|
533 'Mph1103I' => 'ATGCAT 5',
|
|
534 'MroI' => 'TCCGGA 1',
|
|
535 'MroNI' => 'GCCGGC 1',
|
|
536 'MroXI' => 'GAANNNNTTC 5',
|
|
537 'MscI' => 'TGGCCA 3',
|
|
538 'MseI' => 'TTAA 1',
|
|
539 'MslI' => 'CAYNNNNRTG 5',
|
|
540 'MspI' => 'CCGG 1',
|
|
541 'Msp20I' => 'TGGCCA 3',
|
|
542 'MspA1I' => 'CMGCKG 3',
|
|
543 'MspCI' => 'CTTAAG 1',
|
|
544 'MspR9I' => 'CCNGG 2',
|
|
545 'MssI' => 'GTTTAAAC 4',
|
|
546 'MstI' => 'TGCGCA 3',
|
|
547 'MunI' => 'CAATTG 1',
|
|
548 'MvaI' => 'CCWGG 2',
|
|
549 'MvnI' => 'CGCG 2',
|
|
550 'MwoI' => 'GCNNNNNNNGC 7',
|
|
551 'NaeI' => 'GCCGGC 3',
|
|
552 'NarI' => 'GGCGCC 2',
|
|
553 'NciI' => 'CCSGG 2',
|
|
554 'NcoI' => 'CCATGG 1',
|
|
555 'NdeI' => 'CATATG 2',
|
|
556 'NdeII' => 'GATC 0',
|
|
557 'NgoAIV' => 'GCCGGC 1',
|
|
558 'NgoMIV' => 'GCCGGC 1',
|
|
559 'NheI' => 'GCTAGC 1',
|
|
560 'NlaIII' => 'CATG 4',
|
|
561 'NlaIV' => 'GGNNCC 3',
|
|
562 'Nli3877I' => 'CYCGRG 5',
|
|
563 'NmuCI' => 'GTSAC 0',
|
|
564 'NotI' => 'GCGGCCGC 2',
|
|
565 'NruI' => 'TCGCGA 3',
|
|
566 'NruGI' => 'GACNNNNNGTC 6',
|
|
567 'NsbI' => 'TGCGCA 3',
|
|
568 'NsiI' => 'ATGCAT 5',
|
|
569 'NspI' => 'RCATGY 5',
|
|
570 'NspIII' => 'CYCGRG 1',
|
|
571 'NspV' => 'TTCGAA 2',
|
|
572 'NspBII' => 'CMGCKG 3',
|
|
573 'OliI' => 'CACNNNNGTG 5',
|
|
574 'PacI' => 'TTAATTAA 5',
|
|
575 'PaeI' => 'GCATGC 5',
|
|
576 'PaeR7I' => 'CTCGAG 1',
|
|
577 'PagI' => 'TCATGA 1',
|
|
578 'PalI' => 'GGCC 2',
|
|
579 'PauI' => 'GCGCGC 1',
|
|
580 'PceI' => 'AGGCCT 3',
|
|
581 'PciI' => 'ACATGT 1',
|
|
582 'PdiI' => 'GCCGGC 3',
|
|
583 'PdmI' => 'GAANNNNTTC 5',
|
|
584 'Pfl23II' => 'CGTACG 1',
|
|
585 'PflBI' => 'CCANNNNNTGG 7',
|
|
586 'PflFI' => 'GACNNNGTC 4',
|
|
587 'PflMI' => 'CCANNNNNTGG 7',
|
|
588 'PfoI' => 'TCCNGGA 1',
|
|
589 'PinAI' => 'ACCGGT 1',
|
|
590 'Ple19I' => 'CGATCG 4',
|
|
591 'PmaCI' => 'CACGTG 3',
|
|
592 'PmeI' => 'GTTTAAAC 4',
|
|
593 'PmlI' => 'CACGTG 3',
|
|
594 'Ppu10I' => 'ATGCAT 1',
|
|
595 'PpuMI' => 'RGGWCCY 2',
|
|
596 'PpuXI' => 'RGGWCCY 2',
|
|
597 'PshAI' => 'GACNNNNGTC 5',
|
|
598 'PshBI' => 'ATTAAT 2',
|
|
599 'PsiI' => 'TTATAA 3',
|
|
600 'Psp03I' => 'GGWCC 4',
|
|
601 'Psp5II' => 'RGGWCCY 2',
|
|
602 'Psp6I' => 'CCWGG 0',
|
|
603 'Psp1406I' => 'AACGTT 2',
|
|
604 'PspAI' => 'CCCGGG 1',
|
|
605 'Psp124BI' => 'GAGCTC 5',
|
|
606 'PspEI' => 'GGTNACC 1',
|
|
607 'PspGI' => 'CCWGG 0',
|
|
608 'PspLI' => 'CGTACG 1',
|
|
609 'PspN4I' => 'GGNNCC 3',
|
|
610 'PspOMI' => 'GGGCCC 1',
|
|
611 'PspPI' => 'GGNCC 1',
|
|
612 'PspPPI' => 'RGGWCCY 2',
|
|
613 'PssI' => 'RGGNCCY 5',
|
|
614 'PstI' => 'CTGCAG 5',
|
|
615 'PsuI' => 'RGATCY 1',
|
|
616 'PsyI' => 'GACNNNGTC 4',
|
|
617 'PvuI' => 'CGATCG 4',
|
|
618 'PvuII' => 'CAGCTG 3',
|
|
619 'RcaI' => 'TCATGA 1',
|
|
620 'RsaI' => 'GTAC 2',
|
|
621 'RsrII' => 'CGGWCCG 2',
|
|
622 'Rsr2I' => 'CGGWCCG 2',
|
|
623 'SacI' => 'GAGCTC 5',
|
|
624 'SacII' => 'CCGCGG 4',
|
|
625 'SalI' => 'GTCGAC 1',
|
|
626 'SanDI' => 'GGGWCCC 2',
|
|
627 'SatI' => 'GCNGC 2',
|
|
628 'SauI' => 'CCTNAGG 2',
|
|
629 'Sau96I' => 'GGNCC 1',
|
|
630 'Sau3AI' => 'GATC 0',
|
|
631 'SbfI' => 'CCTGCAGG 6',
|
|
632 'ScaI' => 'AGTACT 3',
|
|
633 'SciI' => 'CTCGAG 3',
|
|
634 'ScrFI' => 'CCNGG 2',
|
|
635 'SdaI' => 'CCTGCAGG 6',
|
|
636 'SduI' => 'GDGCHC 5',
|
|
637 'SecI' => 'CCNNGG 1',
|
|
638 'SelI' => 'CGCG 0',
|
|
639 'SexAI' => 'ACCWGGT 1',
|
|
640 'SfcI' => 'CTRYAG 1',
|
|
641 'SfeI' => 'CTRYAG 1',
|
|
642 'SfiI' => 'GGCCNNNNNGGCC 8',
|
|
643 'SfoI' => 'GGCGCC 3',
|
|
644 'Sfr274I' => 'CTCGAG 1',
|
|
645 'Sfr303I' => 'CCGCGG 4',
|
|
646 'SfuI' => 'TTCGAA 2',
|
|
647 'SgfI' => 'GCGATCGC 5',
|
|
648 'SgrAI' => 'CRCCGGYG 2',
|
|
649 'SgrBI' => 'CCGCGG 4',
|
|
650 'SinI' => 'GGWCC 1',
|
|
651 'SlaI' => 'CTCGAG 1',
|
|
652 'SmaI' => 'CCCGGG 3',
|
|
653 'SmiI' => 'ATTTAAAT 4',
|
|
654 'SmiMI' => 'CAYNNNNRTG 5',
|
|
655 'SmlI' => 'CTYRAG 1',
|
|
656 'SnaBI' => 'TACGTA 3',
|
|
657 'SpaHI' => 'GCATGC 5',
|
|
658 'SpeI' => 'ACTAGT 1',
|
|
659 'SphI' => 'GCATGC 5',
|
|
660 'SplI' => 'CGTACG 1',
|
|
661 'SrfI' => 'GCCCGGGC 4',
|
|
662 'Sse9I' => 'AATT 0',
|
|
663 'Sse232I' => 'CGCCGGCG 2',
|
|
664 'Sse8387I' => 'CCTGCAGG 6',
|
|
665 'Sse8647I' => 'AGGWCCT 2',
|
|
666 'SseBI' => 'AGGCCT 3',
|
|
667 'SspI' => 'AATATT 3',
|
|
668 'SspBI' => 'TGTACA 1',
|
|
669 'SstI' => 'GAGCTC 5',
|
|
670 'SstII' => 'CCGCGG 4',
|
|
671 'StuI' => 'AGGCCT 3',
|
|
672 'StyI' => 'CCWWGG 1',
|
|
673 'SunI' => 'CGTACG 1',
|
|
674 'SwaI' => 'ATTTAAAT 4',
|
|
675 'TaaI' => 'ACNGT 3',
|
|
676 'TaiI' => 'ACGT 4',
|
|
677 'TaqI' => 'TCGA 1',
|
|
678 'TasI' => 'AATT 0',
|
|
679 'TatI' => 'WGTACW 1',
|
|
680 'TauI' => 'GCSGC 4',
|
|
681 'TelI' => 'GACNNNGTC 4',
|
|
682 'TfiI' => 'GAWTC 1',
|
|
683 'ThaI' => 'CGCG 2',
|
|
684 'TliI' => 'CTCGAG 1',
|
|
685 'Tru1I' => 'TTAA 1',
|
|
686 'Tru9I' => 'TTAA 1',
|
|
687 'TscI' => 'ACGT 4',
|
|
688 'TseI' => 'GCWGC 1',
|
|
689 'Tsp45I' => 'GTSAC 0',
|
|
690 'Tsp509I' => 'AATT 0',
|
|
691 'Tsp4CI' => 'ACNGT 3',
|
|
692 'TspEI' => 'AATT 0',
|
|
693 'Tth111I' => 'GACNNNGTC 4',
|
|
694 'TthHB8I' => 'TCGA 1',
|
|
695 'UnbI' => 'GGNCC 0',
|
|
696 'Van91I' => 'CCANNNNNTGG 7',
|
|
697 'Vha464I' => 'CTTAAG 1',
|
|
698 'VneI' => 'GTGCAC 1',
|
|
699 'VpaK11AI' => 'GGWCC 0',
|
|
700 'VpaK11BI' => 'GGWCC 1',
|
|
701 'VspI' => 'ATTAAT 2',
|
|
702 'XagI' => 'CCTNNNNNAGG 5',
|
|
703 'XapI' => 'RAATTY 1',
|
|
704 'XbaI' => 'TCTAGA 1',
|
|
705 'XceI' => 'RCATGY 5',
|
|
706 'XcmI' => 'CCANNNNNNNNNTGG 8',
|
|
707 'XhoI' => 'CTCGAG 1',
|
|
708 'XhoII' => 'RGATCY 1',
|
|
709 'XmaI' => 'CCCGGG 1',
|
|
710 'XmaIII' => 'CGGCCG 1',
|
|
711 'XmaCI' => 'CCCGGG 1',
|
|
712 'XmaJI' => 'CCTAGG 1',
|
|
713 'XmiI' => 'GTMKAC 2',
|
|
714 'XmnI' => 'GAANNNNTTC 5',
|
|
715 'XspI' => 'CTAG 1',
|
|
716 'ZhoI' => 'ATCGAT 2',
|
|
717 'ZraI' => 'GACGTC 3',
|
|
718 'Zsp2I' => 'ATGCAT 5',
|
|
719 );
|
|
720
|
|
721 @RE_available = sort keys %RE;
|
|
722
|
|
723
|
|
724 =head1 new
|
|
725
|
|
726 Title : new
|
|
727 Purpose : Initializes the RestrictionEnzyme object and calls
|
|
728 : superclass constructor last (Bio:Seq.pm).
|
|
729 Returns : n/a
|
|
730 Argument : Parameters passed to new()
|
|
731 Comments : A RestrictionEnzyme object manages its recognition sequence
|
|
732 : as a Bio::PrimarySeq object.
|
|
733
|
|
734 See Also : L<_make_custom>(), L<_make_standard>(), B<Bio::PrimarySeq.pm::_initialize()>
|
|
735
|
|
736 =cut
|
|
737
|
|
738 #---------------
|
|
739 sub new {
|
|
740 #---------------
|
|
741 my($class, @args) = @_;
|
|
742
|
|
743 my $self = $class->SUPER::new(@args);
|
|
744 my ($name,$make) = $self->_rearrange([qw(NAME MAKE)],@args);
|
|
745 $name && $self->name($name);
|
|
746 my %data;
|
|
747 if(defined $make && $make eq 'custom') {
|
|
748 %data = $self->_make_custom($name);
|
|
749 } else {
|
|
750 %data = $self->_make_standard($name);
|
|
751 }
|
|
752 $self->{'_seq'} = new Bio::PrimarySeq(%data,
|
|
753 -VERBOSE =>$self->verbose,
|
|
754 -alphabet => 'dna',
|
|
755 );
|
|
756 return $self;
|
|
757 }
|
|
758
|
|
759
|
|
760 #=head1 _make_standard
|
|
761 #
|
|
762 # Title : _make_standard
|
|
763 # Usage : n/a; automatically called by _initialize()
|
|
764 # Purpose : Permits custom RE object construction from name.
|
|
765 # : 'EcoRI'.
|
|
766 # Returns : Hash containing named parameters for Bio::PrimarySeq.pm constructor.
|
|
767 # Argument : String containing string with special syntax.
|
|
768 # Throws : Exception if the requested enzyme name is unavailable.
|
|
769 # : NOTE: Case sensitive.
|
|
770 #
|
|
771 #See Also : L<Bio::PrimarySeq::_initialize()|Bio::PrimarySeq>, L<_make_custom()|_make_custom>
|
|
772 #
|
|
773 #=cut
|
|
774
|
|
775 #------------------
|
|
776 sub _make_standard {
|
|
777 #------------------
|
|
778 my($self, $name) = @_;
|
|
779
|
|
780 $name =~ s/^\s+|\s+$//g;
|
|
781
|
|
782 $self->is_available($name) ||
|
|
783 $self->throw("Unavailable or undefined enzyme: $name (Note: CASE SENSITIVE)\n" .
|
|
784 "Currently available enzymes: \n@RE_available\n");
|
|
785
|
|
786 my @data = split( ' ', $RE{$name});
|
|
787 my (%dat);
|
|
788 $dat{-SEQ} = $data[0];
|
|
789 $dat{-NAME} = $dat{-ID}= $name;
|
|
790 $self->{'_cuts_after'} = $data[1];
|
|
791
|
|
792 return %dat;
|
|
793 }
|
|
794
|
|
795
|
|
796 #=head1 _make_custom
|
|
797 #
|
|
798 # Title : _make_custom
|
|
799 # Usage : n/a; automatically called by _initialize()
|
|
800 # Purpose : Permits custom RE object construction from strings
|
|
801 # : such as 'EcoRI--G^AATTC' as the name of the enzyme.
|
|
802 # Returns : Hash containing named parameters for Bio::PrimarySeq.pm constructor.
|
|
803 # Argument : String containing string with special syntax.
|
|
804 # Throws : Exception if the string has bad syntax.
|
|
805 # : Warning if the string did not specify cut position.
|
|
806 # : Places cut site after 5'-most position.
|
|
807 #
|
|
808 #See Also : L<Bio::PrimarySeq::_initialize()|Bio::PrimarySeq>
|
|
809 #
|
|
810 #=cut
|
|
811
|
|
812 #'
|
|
813 #-----------------
|
|
814 sub _make_custom {
|
|
815 #-----------------
|
|
816 my($self, $name) = @_;
|
|
817
|
|
818 $name =~ s/\s+//g;
|
|
819 my @parts = split '--', $name;
|
|
820 my (%dat);
|
|
821 $dat{-NAME} = $dat{-ID} = $parts[0];
|
|
822 $self->name($parts[0]); ## Reset name
|
|
823
|
|
824 $parts[1] || return $self->throw("Undefined recognition site for $parts[0].",
|
|
825 "Use this syntax: EcoRV--GAT^ATC");
|
|
826 ## Determine the cuts_after point.
|
|
827 my $cut_index = index $parts[1], '^';
|
|
828 if( $cut_index <0) { $cut_index = 0;
|
|
829 $self->warn("Unknown cut position for $parts[0]. Assuming position 0\n" .
|
|
830 "Use carat to specify cut position (e.g., G^AATTC)"); }
|
|
831 $self->{'_cuts_after'} = $cut_index;
|
|
832
|
|
833 ## Save the recognition sequence after removing the '^'
|
|
834 $parts[1] =~ s/\^//g;
|
|
835 $dat{-SEQ} = $parts[1];
|
|
836 return %dat;
|
|
837 }
|
|
838
|
|
839
|
|
840 =head1 cuts_after
|
|
841
|
|
842 Title : cuts_after
|
|
843 Usage : $num = $re->cuts_after();
|
|
844 Purpose : Sets/Gets an integer indicating the position of cleavage
|
|
845 : relative to the 5' end of the recognition sequence.
|
|
846 Returns : Integer
|
|
847 Argument : Integer (optional)
|
|
848 Throws : Exception if argument is non-numeric.
|
|
849 Access : Public
|
|
850 Comments : This method is only needed to change the cuts at
|
|
851 : position. This data is automatically set during
|
|
852 : construction.
|
|
853
|
|
854 See Also : L<_make_standard()|_make_standard>, L<_make_custom()|_make_custom>
|
|
855
|
|
856 =cut
|
|
857
|
|
858 #'
|
|
859 #---------------
|
|
860 sub cuts_after {
|
|
861 #---------------
|
|
862 my $self = shift;
|
|
863 if(@_) { my $num = shift;
|
|
864 if($num == 0 and $num ne '0') {
|
|
865 $self->throw("The cuts_after position be an integer ($num)");
|
|
866 }
|
|
867 $self->{'_cuts_after'} = $num;
|
|
868 }
|
|
869 $self->{'_cuts_after'};
|
|
870 }
|
|
871
|
|
872
|
|
873
|
|
874 =head1 site
|
|
875
|
|
876 Title : site
|
|
877 Usage : $re->site();
|
|
878 Purpose : Gets the recognition sequence for the enzyme.
|
|
879 Example : $seq_string = $re->site();
|
|
880 Returns : String containing recognition sequence indicating
|
|
881 : cleavage site as in 'G^AATTC'.
|
|
882 Argument : n/a
|
|
883 Throws : n/a
|
|
884 Comments : If you want a simple string representing the site without
|
|
885 any '^', use the string() method.
|
|
886
|
|
887 See Also : L<string()|string>
|
|
888
|
|
889 =cut
|
|
890
|
|
891 #---------
|
|
892 sub site {
|
|
893 #---------
|
|
894 my $self = shift;
|
|
895 my $seq = $self->seq;
|
|
896 my $cuts_after = $self->cuts_after;
|
|
897 if($cuts_after > 0) {
|
|
898 if( $cuts_after >= $seq->length) {
|
|
899 return $seq->seq.'^';
|
|
900 } else {
|
|
901 return $seq->subseq(1, $self->cuts_after).'^'.$seq->subseq($self->cuts_after+1, $seq->length);
|
|
902 }
|
|
903 } else {
|
|
904 return $seq->seq;
|
|
905 }
|
|
906 }
|
|
907
|
|
908
|
|
909 =head1 seq
|
|
910
|
|
911 Title : seq
|
|
912 Usage : $re->seq();
|
|
913 Purpose : Get the Bio::PrimarySeq.pm-derived object representing
|
|
914 : the recognition sequence
|
|
915 Returns : String
|
|
916 Argument : n/a
|
|
917 Throws : n/a
|
|
918
|
|
919 See Also : L<string()|string>, L<revcom()|revcom>
|
|
920
|
|
921 =cut
|
|
922
|
|
923 #---------
|
|
924 sub seq { my $self = shift; $self->{'_seq'}; }
|
|
925 #---------
|
|
926
|
|
927
|
|
928
|
|
929 =head1 string
|
|
930
|
|
931 Title : string
|
|
932 Usage : $re->string();
|
|
933 Purpose : Get a string representing the recognition sequence.
|
|
934 Returns : String. Does NOT contain a '^' representing the cut location
|
|
935 as returned by the site() method
|
|
936 Argument : n/a
|
|
937 Throws : n/a
|
|
938 Comments : Delegates to the Bio::PrimarySeq-derived object.
|
|
939
|
|
940 See Also : L<seq()|seq>, L<site()|site>, L<revcom()|revcom>
|
|
941
|
|
942 =cut
|
|
943
|
|
944 #-----------
|
|
945 sub string { my $self = shift; $self->{'_seq'}->seq; }
|
|
946 #-----------
|
|
947
|
|
948
|
|
949
|
|
950 =head1 revcom
|
|
951
|
|
952 Title : revcom
|
|
953 Usage : $re->revcom();
|
|
954 Purpose : Get a string representing the reverse complement of
|
|
955 : the recognition sequence.
|
|
956 Returns : String
|
|
957 Argument : n/a
|
|
958 Throws : n/a
|
|
959 Comments : Delegates to the Bio::PrimarySeq.pm-derived object, but needs to
|
|
960 get out the string from it, as now Bio::PrimarySeq->revcom makes a
|
|
961 Bio::PrimarySeq object
|
|
962
|
|
963 See Also : L<seq()|seq>, L<string()|string>
|
|
964
|
|
965 =cut
|
|
966
|
|
967 #-----------
|
|
968 sub revcom { my $self = shift; $self->{'_seq'}->revcom->seq(); }
|
|
969 #-----------
|
|
970
|
|
971
|
|
972
|
|
973 =head1 cut_seq
|
|
974
|
|
975 Title : cut_seq
|
|
976 Usage : $re->cut_seq(<sequence object>);
|
|
977 Purpose : Conceptually cut or "digest" a DNA sequence with the given enzyme.
|
|
978 Example : $string = $re->cut_seq(<sequence object>);
|
|
979 Returns : List of strings containing the resulting fragments.
|
|
980 Argument : Reference to a Bio::PrimarySeq.pm-derived object.
|
|
981 Throws : Exception if argument is not an object.
|
|
982 : (Does not yet verify that it is derived from Bio::PrimarySeq.pm.)
|
|
983 Comments : Strategy relies on Perl's built-in split() function.
|
|
984 : Since split removes the recognition pattern, the resulting
|
|
985 : fragments are repaired after split()-ing.
|
|
986 : A side-effect of this is that for sites with ambiguous
|
|
987 : recognition sequence (i.e., containing N), the fragments
|
|
988 : will contain ambiguity characters instead of AGCT.
|
|
989 :
|
|
990 : There is currently no support for partial digestions.
|
|
991 : There is currently no support for circular sequences.
|
|
992 : (This should just involve merging the first and last frag
|
|
993 : if $seqObj->is_circular returns true).
|
|
994
|
|
995 =cut
|
|
996
|
|
997 #'
|
|
998 #-------------
|
|
999 sub cut_seq {
|
|
1000 #-------------
|
|
1001 my( $self, $seqObj) = @_;
|
|
1002 if( !ref($seqObj) ||
|
|
1003 ! $seqObj->isa('Bio::PrimarySeqI') ) {
|
|
1004 $self->throw( "Can't cut sequence. Missing or invalid object".
|
|
1005 "seqObj: $seqObj");
|
|
1006 }
|
|
1007
|
|
1008 my $cuts_after = $self->{'_cuts_after'};
|
|
1009 my ($site_3prime_seq, $site_5prime_seq);
|
|
1010 my $reSeq = $self->seq;
|
|
1011 if($cuts_after == 0) {
|
|
1012 $site_3prime_seq = '';
|
|
1013 $site_5prime_seq = $reSeq->seq();
|
|
1014 } elsif($cuts_after == $reSeq->length) {
|
|
1015 $site_3prime_seq = $reSeq->seq();
|
|
1016 $site_5prime_seq = '';
|
|
1017 } else {
|
|
1018 $site_3prime_seq = $reSeq->subseq(1, $self->{'_cuts_after'});
|
|
1019 $site_5prime_seq = $reSeq->subseq($self->{'_cuts_after'}+1, $reSeq->length);
|
|
1020 }
|
|
1021
|
|
1022 $self->debug("3' site: $site_3prime_seq\n5' site: $site_5prime_seq\n");
|
|
1023
|
|
1024 my(@re_frags);
|
|
1025 my $seq = uc $self->_expanded_string;
|
|
1026
|
|
1027 if(!$self->palindromic and $self->name ne 'N') {
|
|
1028 my $revseq = $self->_expanded_string( $reSeq->revcom->seq );
|
|
1029 $seq .= '|'.uc($revseq);
|
|
1030 }
|
|
1031 $self->debug(sprintf("$ID: site seq: %s\n\n", $seq));
|
|
1032 $self->debug(sprintf("$ID: splitting %s\n\n",$reSeq->seq));
|
|
1033 @re_frags = split(/$seq/i, $seqObj->seq);
|
|
1034
|
|
1035 $self->debug("$ID: cut_seq, ".scalar @re_frags. " fragments.\n");
|
|
1036
|
|
1037 ## Re-attach the split recognition site back to the frags
|
|
1038 ## since perl zapped them in the split() call.
|
|
1039 my($i);
|
|
1040 my $numFrags = scalar @re_frags;
|
|
1041 for($i=0; $i<$numFrags; $i++) {
|
|
1042 $i < $#re_frags and $re_frags[$i] = $re_frags[$i].$site_3prime_seq;
|
|
1043 $i > 0 and $re_frags[$i] = $site_5prime_seq.$re_frags[$i];
|
|
1044 }
|
|
1045 @re_frags;
|
|
1046 }
|
|
1047
|
|
1048 =head1 cut_locations
|
|
1049
|
|
1050 Title : cut_locations
|
|
1051 Usage : my $locations = $re->cut_locations(<sequence_object>);
|
|
1052 Purpose : Report the location of the recognition site(s) within
|
|
1053 : an input sequence.
|
|
1054 Example : my $locations = $re->annotate_seq($seqObj);
|
|
1055 Returns : Arrayref of starting locations where enzyme would cut
|
|
1056 Argument : Reference to a Bio::PrimarySeqI-derived sequence object.
|
|
1057 Throws : n/a
|
|
1058 Comments :
|
|
1059
|
|
1060 =cut
|
|
1061
|
|
1062 #-----------------
|
|
1063 sub cut_locations {
|
|
1064 #-----------------
|
|
1065 my($self, $seqobj) = @_;
|
|
1066
|
|
1067 my $site = $self->_expanded_string;
|
|
1068 my $seq = $seqobj->seq;
|
|
1069 study($seq);
|
|
1070 my @locations;
|
|
1071 while( $seq =~ /($site)/ig ) {
|
|
1072 # $` is preceding string before pattern so length returns position
|
|
1073 push @locations, length($`);
|
|
1074 }
|
|
1075 return \@locations;
|
|
1076 }
|
|
1077
|
|
1078 # Purpose : Expand nucleotide ambiguity codes to their representative letters
|
|
1079 # Argument: (optional) the string to be expanded. If not supplied, used
|
|
1080 # the string returned by $self->string().
|
|
1081 # Returns : String
|
|
1082 sub _expanded_string {
|
|
1083 my ($self, $str) = @_;
|
|
1084
|
|
1085 $str ||= $self->string;
|
|
1086
|
|
1087 if( $self->name ne 'N' ) {
|
|
1088 $str =~ s/N|X/\./g;
|
|
1089 $str =~ s/R/\[AG\]/g;
|
|
1090 $str =~ s/Y/\[CT\]/g;
|
|
1091 $str =~ s/S/\[GC\]/g;
|
|
1092 $str =~ s/W/\[AT\]/g;
|
|
1093 $str =~ s/M/\[AC\]/g;
|
|
1094 $str =~ s/K/\[TG\]/g;
|
|
1095 $str =~ s/B/\[CGT\]/g;
|
|
1096 $str =~ s/D/\[AGT\]/g;
|
|
1097 $str =~ s/H/\[ACT\]/g;
|
|
1098 $str =~ s/V/\[ACG\]/g;
|
|
1099 }
|
|
1100 return $str;
|
|
1101 }
|
|
1102
|
|
1103
|
|
1104 =head1 annotate_seq
|
|
1105
|
|
1106 Title : annotate_seq
|
|
1107 Usage : $re->annotate_seq(<sequence_object>);
|
|
1108 Purpose : Identify the location of the recognition site(s) within
|
|
1109 : an input sequence. Uses HTML.
|
|
1110 Example : $annot_seq = $re->annotate_seq($seqObj);
|
|
1111 Returns : String containing the annotated sequence.
|
|
1112 Argument : Reference to a Bio::PrimarySeq.pm-derived sequence object.
|
|
1113 Throws : n/a
|
|
1114 Comments : The annotated sequence must be viewed with a web
|
|
1115 : browser to see the location(s) of the recognition site(s).
|
|
1116
|
|
1117 =cut
|
|
1118
|
|
1119 #-----------------
|
|
1120 sub annotate_seq {
|
|
1121 #-----------------
|
|
1122 my($self, $seqObj) = @_;
|
|
1123
|
|
1124 my $site = $self->_expanded_string;
|
|
1125 my $seq = $seqObj->seq;
|
|
1126
|
|
1127 $seq =~ s|$site|<b>$site</b>|g;
|
|
1128 return $seq;
|
|
1129 }
|
|
1130
|
|
1131
|
|
1132 =head1 palindromic
|
|
1133
|
|
1134 Title : palindromic
|
|
1135 Usage : $re->palindromic();
|
|
1136 Purpose : Determines if the recognition sequence is palindromic
|
|
1137 : for the current restriction enzyme.
|
|
1138 Returns : Boolean
|
|
1139 Argument : n/a
|
|
1140 Throws : n/a
|
|
1141 Access : Public
|
|
1142 Comments : A palindromic site (EcoRI): 5-GAATTC-3
|
|
1143 : 3-CTTAAG-5
|
|
1144
|
|
1145 =cut
|
|
1146
|
|
1147 #----------------
|
|
1148 sub palindromic {
|
|
1149 #----------------
|
|
1150 my $self = shift;
|
|
1151 $self->string eq $self->revcom;
|
|
1152 }
|
|
1153
|
|
1154
|
|
1155
|
|
1156 =head1 is_available
|
|
1157
|
|
1158 Title : is_available
|
|
1159 Usage : $re->is_available(<string containing name of enzyme>);
|
|
1160 Purpose : Determine if an enzyme is available (to this module).
|
|
1161 : (see the package lexical %RE).
|
|
1162 Example : $re->is_available('EcoRI');
|
|
1163 : &Bio::Tools::RestrictionEnzyme::is_available($object,'EcoRI');
|
|
1164 Returns : Boolean
|
|
1165 Argument : String
|
|
1166 Throws : n/a
|
|
1167 Comments : This method does NOT give information about
|
|
1168 : commercial availability (yet).
|
|
1169 : Enzyme names are CASE SENSITIVE.
|
|
1170
|
|
1171 See Also : L<available_list()|available_list>
|
|
1172
|
|
1173 =cut
|
|
1174
|
|
1175 #----------------
|
|
1176 sub is_available {
|
|
1177 #----------------
|
|
1178 my($self,$name) = @_;
|
|
1179 exists $RE{$name};
|
|
1180 }
|
|
1181
|
|
1182 #--------------
|
|
1183 sub available {
|
|
1184 #--------------
|
|
1185 my($self,$name) = @_;
|
|
1186 print STDERR "\nDeprecated method: $ID:: available(); ".
|
|
1187 "use is_available() instead.\n";
|
|
1188 $self->is_available($name);
|
|
1189 }
|
|
1190
|
|
1191
|
|
1192 =head2 name
|
|
1193
|
|
1194 Title : name
|
|
1195 Usage : $obj->name($newval)
|
|
1196 Function:
|
|
1197 Example :
|
|
1198 Returns : value of name
|
|
1199 Args : newvalue (optional)
|
|
1200
|
|
1201
|
|
1202 =cut
|
|
1203
|
|
1204 sub name{
|
|
1205 my ($obj,$value) = @_;
|
|
1206 if( defined $value) {
|
|
1207 $obj->{'name'} = $value;
|
|
1208 }
|
|
1209 return $obj->{'name'};
|
|
1210
|
|
1211 }
|
|
1212
|
|
1213 =head1 available_list
|
|
1214
|
|
1215 Title : available_list
|
|
1216 Usage : $re->available_list([<integer>]);
|
|
1217 Purpose : Retrieve a list of currently available enzymes.
|
|
1218 Example : @all = $re->available_list(); ## All enzymes
|
|
1219 : @six_cutters = $re->available_list(6); ## All 6-cutters
|
|
1220 Returns : List of strings
|
|
1221 Argument : Integer (optional)
|
|
1222 Throws : n/a
|
|
1223 Comments : This method may be more appropriate for a REData.pm class.
|
|
1224
|
|
1225 See Also : L<is_available()|is_available>
|
|
1226
|
|
1227 =cut
|
|
1228
|
|
1229 #-------------------
|
|
1230 sub available_list {
|
|
1231 #-------------------
|
|
1232 my($self,$size) = @_;
|
|
1233 $size ||= 'all';
|
|
1234
|
|
1235 $size eq 'all' and return @RE_available;
|
|
1236
|
|
1237 my(@data, @names);
|
|
1238 foreach (@RE_available) {
|
|
1239 @data = split /\s/, $RE{$_};
|
|
1240 if(length $data[0] == $size) {
|
|
1241 push @names, $_;
|
|
1242 }
|
|
1243 }
|
|
1244 @names;
|
|
1245 }
|
|
1246
|
|
1247 1;
|