Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/Variation/VariantI.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 # $Id: VariantI.pm,v 1.12 2002/10/22 07:38:49 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::Variation::VariantI | |
4 # | |
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
6 # | |
7 # Copyright Heikki Lehvaslaiho | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Variation::VariantI - Sequence Change SeqFeature abstract class | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 #get Bio::Variant::VariantI somehow | |
20 print $var->restriction_changes, "\n"; | |
21 foreach $allele ($var->each_Allele) { | |
22 #work on Bio::Variation::Allele objects | |
23 } | |
24 | |
25 =head1 DESCRIPTION | |
26 | |
27 This superclass defines common methods to basic sequence changes. The | |
28 instantiable classes Bio::Variation::DNAMutation, | |
29 Bio::Variation::RNAChange and Bio::Variation::AAChange use them. | |
30 See L<Bio::Variation::DNAMutation>, L<Bio::Variation::RNAChange>, | |
31 and L<Bio::Variation::AAChange> for more information. | |
32 | |
33 These classes store information, heavy computation to detemine allele | |
34 sequences is done elsewhere. | |
35 | |
36 The database cross-references are implemented as | |
37 Bio::Annotation::DBLink objects. The methods to access them are | |
38 defined in Bio::DBLinkContainerI. See L<Bio::Annotation::DBLink> | |
39 and L<Bio::DBLinkContainerI> for details. | |
40 | |
41 Bio::Variation::VariantI redifines and extends | |
42 Bio::SeqFeature::Generic for sequence variations. This class | |
43 describes specific sequence change events. These events are always | |
44 from a specific reference sequence to something different. See | |
45 L<Bio::SeqFeature::Generic> for more information. | |
46 | |
47 IMPORTANT: The notion of reference sequence permeates all | |
48 Bio::Variation classes. This is especially important to remember when | |
49 dealing with Alleles. In a polymorphic site, there can be a large | |
50 number of alleles. One of then has to be selected to be the reference | |
51 allele (allele_ori). ALL the rest has to be passed to the Variant | |
52 using the method add_Allele, including the mutated allele in a | |
53 canonical mutation. The IO modules and generated attributes depend on | |
54 it. They ignore the allele linked to using allele_mut and circulate | |
55 each Allele returned by each_Allele into allele_mut and calculate | |
56 the changes between that and allele_ori. | |
57 | |
58 | |
59 =head1 FEEDBACK | |
60 | |
61 =head2 Mailing Lists | |
62 | |
63 User feedback is an integral part of the evolution of this and other | |
64 Bioperl modules. Send your comments and suggestions preferably to the | |
65 Bioperl mailing lists Your participation is much appreciated. | |
66 | |
67 bioperl-l@bioperl.org - General discussion | |
68 http://bio.perl.org/MailList.html - About the mailing lists | |
69 | |
70 =head2 Reporting Bugs | |
71 | |
72 report bugs to the Bioperl bug tracking system to help us keep track | |
73 the bugs and their resolution. Bug reports can be submitted via | |
74 email or the web: | |
75 | |
76 bioperl-bugs@bio.perl.org | |
77 http://bugzilla.bioperl.org/ | |
78 | |
79 =head1 AUTHOR - Heikki Lehvaslaiho | |
80 | |
81 Email: heikki@ebi.ac.uk | |
82 Address: | |
83 | |
84 EMBL Outstation, European Bioinformatics Institute | |
85 Wellcome Trust Genome Campus, Hinxton | |
86 Cambs. CB10 1SD, United Kingdom | |
87 | |
88 | |
89 =head1 APPENDIX | |
90 | |
91 The rest of the documentation details each of the object | |
92 methods. Internal methods are usually preceded with a _ | |
93 | |
94 =cut | |
95 | |
96 | |
97 # Let the code begin... | |
98 | |
99 | |
100 package Bio::Variation::VariantI; | |
101 $VERSION=1.0; | |
102 use vars qw(@ISA); | |
103 use strict; | |
104 use Bio::Root::Root; | |
105 use Bio::DBLinkContainerI; | |
106 # Object preamble - inheritance | |
107 | |
108 use Bio::SeqFeature::Generic; | |
109 @ISA = qw(Bio::Root::Root Bio::SeqFeature::Generic Bio::DBLinkContainerI ); | |
110 | |
111 =head2 id | |
112 | |
113 Title : id | |
114 Usage : $obj->id | |
115 Function: | |
116 | |
117 Read only method. Returns the id of the variation object. | |
118 The id is the id of the first DBLink object attached to this object. | |
119 | |
120 Example : | |
121 Returns : scalar | |
122 Args : none | |
123 | |
124 =cut | |
125 | |
126 sub id { | |
127 my ($self) = @_; | |
128 my @ids = $self->each_DBLink; | |
129 my $id = $ids[0] if scalar @ids > 0; | |
130 return $id->database. "::". $id->primary_id if $id; | |
131 } | |
132 | |
133 | |
134 =head2 add_Allele | |
135 | |
136 Title : add_Allele | |
137 Usage : $self->add_Allele($allele) | |
138 Function: | |
139 | |
140 Adds one Bio::Variation::Allele into the list of alleles. | |
141 Note that the method forces the convention that nucleotide | |
142 sequence is in lower case and amino acds are in upper | |
143 case. | |
144 | |
145 Example : | |
146 Returns : 1 when succeeds, 0 for failure. | |
147 Args : Allele object | |
148 | |
149 =cut | |
150 | |
151 | |
152 sub add_Allele { | |
153 my ($self,$value) = @_; | |
154 if (defined $value) { | |
155 if( ! $value->isa('Bio::Variation::Allele') ) { | |
156 my $com = ref $value; | |
157 $self->throw("Is not a Allele object but a [$com]"); | |
158 return 0; | |
159 } else { | |
160 if ( $self->isa('Bio::Variation::AAChange') ) { | |
161 $value->seq( uc $value->seq) if $value->seq; | |
162 } else { | |
163 $value->seq( lc $value->seq) if $value->seq; | |
164 } | |
165 push(@{$self->{'alleles'}},$value); | |
166 $self->allele_mut($value); #???? | |
167 return 1; | |
168 } | |
169 } else { | |
170 return 0; | |
171 } | |
172 } | |
173 | |
174 | |
175 =head2 each_Allele | |
176 | |
177 Title : alleles | |
178 Usage : $obj->each_Allele(); | |
179 Function: | |
180 | |
181 Returns a list of Bio::Variation::Allele objects | |
182 | |
183 Example : | |
184 Returns : list of Alleles | |
185 Args : none | |
186 | |
187 =cut | |
188 | |
189 sub each_Allele{ | |
190 my ($self,@args) = @_; | |
191 return @{$self->{'alleles'}}; | |
192 } | |
193 | |
194 | |
195 | |
196 =head2 isMutation | |
197 | |
198 Title : isMutation | |
199 Usage : print join('/', $obj->each_Allele) if not $obj->isMutation; | |
200 Function: | |
201 | |
202 Returns or sets the boolean value indicating that the | |
203 variant descibed is a canonical mutation with two alleles | |
204 assinged to be the original (wild type) allele and mutated | |
205 allele, respectively. If this value is not set, it is | |
206 assumed that the Variant descibes polymorphisms. | |
207 | |
208 Returns : a boolean | |
209 | |
210 =cut | |
211 | |
212 sub isMutation { | |
213 my ($self,$value) = @_; | |
214 if (defined $value) { | |
215 if ($value ) { | |
216 $self->{'isMutation'} = 1; | |
217 } else { | |
218 $self->{'isMutation'} = 0; | |
219 } | |
220 } | |
221 return $self->{'isMutation'}; | |
222 } | |
223 | |
224 | |
225 =head2 allele_ori | |
226 | |
227 Title : allele_ori | |
228 Usage : $obj->allele_ori(); | |
229 Function: | |
230 | |
231 Links to and returns the Bio::Variation::Allele object. | |
232 If value is not set, returns false. All other Alleles are | |
233 compared to this. | |
234 | |
235 Amino acid sequences are stored in upper case characters, | |
236 others in lower case. | |
237 | |
238 Example : | |
239 Returns : string | |
240 Args : string | |
241 | |
242 See L<Bio::Variation::Allele> for more. | |
243 | |
244 =cut | |
245 | |
246 sub allele_ori { | |
247 my ($self,$value) = @_; | |
248 if( defined $value) { | |
249 if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) { | |
250 $self->throw("Value is not Bio::Variation::Allele but [$value]"); | |
251 } else { | |
252 if ( $self->isa('Bio::Variation::AAChange') ) { | |
253 $value->seq( uc $value->seq) if $value->seq; | |
254 } else { | |
255 $value->seq( lc $value->seq) if $value->seq; | |
256 } | |
257 $self->{'allele_ori'} = $value; | |
258 } | |
259 } | |
260 return $self->{'allele_ori'}; | |
261 } | |
262 | |
263 | |
264 =head2 allele_mut | |
265 | |
266 Title : allele_mut | |
267 Usage : $obj->allele_mut(); | |
268 Function: | |
269 | |
270 Links to and returns the Bio::Variation::Allele | |
271 object. Sets and returns the mutated allele sequence. | |
272 If value is not set, returns false. | |
273 | |
274 Amino acid sequences are stored in upper case characters, | |
275 others in lower case. | |
276 | |
277 Example : | |
278 Returns : string | |
279 Args : string | |
280 | |
281 See L<Bio::Variation::Allele> for more. | |
282 | |
283 =cut | |
284 | |
285 | |
286 sub allele_mut { | |
287 my ($self,$value) = @_; | |
288 if( defined $value) { | |
289 if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) { | |
290 $self->throw("Value is not Bio::Variation::Allele but [$value]"); | |
291 } else { | |
292 if ( $self->isa('Bio::Variation::AAChange') ) { | |
293 $value->seq( uc $value->seq) if $value->seq; | |
294 } else { | |
295 $value->seq( lc $value->seq) if $value->seq; | |
296 } | |
297 $self->{'allele_mut'} = $value; | |
298 } | |
299 } | |
300 return $self->{'allele_mut'}; | |
301 } | |
302 | |
303 =head2 length | |
304 | |
305 Title : length | |
306 Usage : $obj->length(); | |
307 Function: | |
308 | |
309 Sets and returns the length of the affected original | |
310 allele sequence. If value is not set, returns false == 0. | |
311 | |
312 Value 0 means that the variant position is before the | |
313 start=end sequence position. (Value 1 would denote a point | |
314 mutation). This follows the convension to report an | |
315 insertion (2insT) in equivalent way to a corresponding | |
316 deletion (2delT) (Think about indel polymorpism ATC <=> AC | |
317 where the origianal state is not known ). | |
318 | |
319 Example : | |
320 Returns : string | |
321 Args : string | |
322 | |
323 =cut | |
324 | |
325 | |
326 sub length { | |
327 my ($self,$value) = @_; | |
328 if ( defined $value) { | |
329 $self->{'length'} = $value; | |
330 } | |
331 if ( ! exists $self->{'length'} ) { | |
332 return 0; | |
333 } | |
334 return $self->{'length'}; | |
335 } | |
336 | |
337 =head2 upStreamSeq | |
338 | |
339 Title : upStreamSeq | |
340 Usage : $obj->upStreamSeq(); | |
341 Function: | |
342 | |
343 Sets and returns upstream flanking sequence string. If | |
344 value is not set, returns false. The sequence should be | |
345 >=25 characters long, if possible. | |
346 | |
347 Example : | |
348 Returns : string or false | |
349 Args : string | |
350 | |
351 =cut | |
352 | |
353 | |
354 sub upStreamSeq { | |
355 my ($self,$value) = @_; | |
356 if( defined $value) { | |
357 $self->{'upstreamseq'} = $value; | |
358 } | |
359 return $self->{'upstreamseq'}; | |
360 } | |
361 | |
362 | |
363 =head2 dnStreamSeq | |
364 | |
365 Title : dnStreamSeq | |
366 Usage : $obj->dnStreamSeq(); | |
367 Function: | |
368 | |
369 Sets and returns dnstream flanking sequence string. If | |
370 value is not set, returns false. The sequence should be | |
371 >=25 characters long, if possible. | |
372 | |
373 Example : | |
374 Returns : string or false | |
375 Args : string | |
376 | |
377 =cut | |
378 | |
379 | |
380 sub dnStreamSeq { | |
381 my ($self,$value) = @_; | |
382 if( defined $value) { | |
383 $self->{'dnstreamseq'} = $value; | |
384 } | |
385 return $self->{'dnstreamseq'}; | |
386 | |
387 } | |
388 | |
389 | |
390 =head2 label | |
391 | |
392 Title : label | |
393 Usage : $obj->label(); | |
394 Function: | |
395 | |
396 Sets and returns mutation event label(s). If value is not | |
397 set, or no argument is given returns false. Each | |
398 instantiable class needs to implement this method. Valid | |
399 values are listed in 'Mutation event controlled vocabulary' in | |
400 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. | |
401 | |
402 Example : | |
403 Returns : string | |
404 Args : string | |
405 | |
406 =cut | |
407 | |
408 | |
409 sub label { | |
410 my ($self,$value) = @_; | |
411 $self->throw("[$self] has not implemeted method 'label'"); | |
412 } | |
413 | |
414 | |
415 | |
416 =head2 status | |
417 | |
418 Title : status | |
419 Usage : $obj->status() | |
420 Function: | |
421 | |
422 Returns the status of the sequence change object. | |
423 Valid values are: 'suspected' and 'proven' | |
424 | |
425 Example : $obj->status('proven'); | |
426 Returns : scalar | |
427 Args : valid string (optional, for setting) | |
428 | |
429 | |
430 =cut | |
431 | |
432 | |
433 sub status { | |
434 my ($self,$value) = @_; | |
435 my %status = (suspected => 1, | |
436 proven => 1 | |
437 ); | |
438 | |
439 if( defined $value) { | |
440 $value = lc $value; | |
441 if ($status{$value}) { | |
442 $self->{'status'} = $value; | |
443 } | |
444 else { | |
445 $self->throw("$value is not valid status value!"); | |
446 } | |
447 } | |
448 if( ! exists $self->{'status'} ) { | |
449 return "$self"; | |
450 } | |
451 return $self->{'status'}; | |
452 } | |
453 | |
454 | |
455 =head2 proof | |
456 | |
457 Title : proof | |
458 Usage : $obj->proof() | |
459 Function: | |
460 | |
461 Returns the proof of the sequence change object. | |
462 Valid values are: 'computed' and 'experimental'. | |
463 | |
464 Example : $obj->proof('computed'); | |
465 Returns : scalar | |
466 Args : valid string (optional, for setting) | |
467 | |
468 | |
469 =cut | |
470 | |
471 | |
472 sub proof { | |
473 my ($self,$value) = @_; | |
474 my %proof = (computed => 1, | |
475 experimental => 1 | |
476 ); | |
477 | |
478 if( defined $value) { | |
479 $value = lc $value; | |
480 if ($proof{$value}) { | |
481 $self->{'proof'} = $value; | |
482 } else { | |
483 $self->throw("$value is not valid proof value!"); | |
484 } | |
485 } | |
486 return $self->{'proof'}; | |
487 } | |
488 | |
489 | |
490 =head2 region | |
491 | |
492 Title : region | |
493 Usage : $obj->region(); | |
494 Function: | |
495 | |
496 Sets and returns the name of the sequence region type or | |
497 protein domain at this location. If value is not set, | |
498 returns false. | |
499 | |
500 Example : | |
501 Returns : string | |
502 Args : string | |
503 | |
504 =cut | |
505 | |
506 | |
507 sub region { | |
508 my ($self,$value) = @_; | |
509 if( defined $value) { | |
510 $self->{'region'} = $value; | |
511 } | |
512 return $self->{'region'}; | |
513 } | |
514 | |
515 | |
516 =head2 region_value | |
517 | |
518 Title : region_value | |
519 Usage : $obj->region_value(); | |
520 Function: | |
521 | |
522 Sets and returns the name of the sequence region_value or | |
523 protein domain at this location. If value is not set, | |
524 returns false. | |
525 | |
526 Example : | |
527 Returns : string | |
528 Args : string | |
529 | |
530 =cut | |
531 | |
532 | |
533 sub region_value { | |
534 my ($self,$value) = @_; | |
535 if( defined $value) { | |
536 $self->{'region_value'} = $value; | |
537 } | |
538 return $self->{'region_value'}; | |
539 } | |
540 | |
541 =head2 region_dist | |
542 | |
543 Title : region_dist | |
544 Usage : $obj->region_dist(); | |
545 Function: | |
546 | |
547 Sets and returns the distance tot the closest region | |
548 (i.e. intro/exon or domain) boundary. If distance is not | |
549 set, returns false. | |
550 | |
551 Example : | |
552 Returns : integer | |
553 Args : integer | |
554 | |
555 =cut | |
556 | |
557 | |
558 sub region_dist { | |
559 my ($self,$value) = @_; | |
560 if( defined $value) { | |
561 if ( not $value =~ /^[+-]?\d+$/ ) { | |
562 $self->throw("[$value] for region_dist has to be an integer\n"); | |
563 } else { | |
564 $self->{'region_dist'} = $value; | |
565 } | |
566 } | |
567 return $self->{'region_dist'}; | |
568 } | |
569 | |
570 | |
571 =head2 numbering | |
572 | |
573 Title : numbering | |
574 Usage : $obj->numbering() | |
575 Function: | |
576 | |
577 Returns the numbering chema used locating sequnce features. | |
578 Valid values are: 'entry' and 'coding' | |
579 | |
580 Example : $obj->numbering('coding'); | |
581 Returns : scalar | |
582 Args : valid string (optional, for setting) | |
583 | |
584 | |
585 =cut | |
586 | |
587 | |
588 sub numbering { | |
589 my ($self,$value) = @_; | |
590 my %numbering = (entry => 1, | |
591 coding => 1 | |
592 ); | |
593 | |
594 if( defined $value) { | |
595 $value = lc $value; | |
596 if ($numbering{$value}) { | |
597 $self->{'numbering'} = $value; | |
598 } | |
599 else { | |
600 $self->throw("'$value' is not a valid for numbering!"); | |
601 } | |
602 } | |
603 if( ! exists $self->{'numbering'} ) { | |
604 return "$self"; | |
605 } | |
606 return $self->{'numbering'}; | |
607 } | |
608 | |
609 =head2 mut_number | |
610 | |
611 Title : mut_number | |
612 Usage : $num = $obj->mut_number; | |
613 : $num = $obj->mut_number($number); | |
614 Function: | |
615 | |
616 Returns or sets the number identifying the order in which the | |
617 mutation has been issued. Numbers shouldstart from 1. | |
618 If the number has never been set, the method will return '' | |
619 | |
620 If you want the output from IO modules look nice and, for | |
621 multivariant/allele variations, make sense you better set | |
622 this attribute. | |
623 | |
624 Returns : an integer | |
625 | |
626 =cut | |
627 | |
628 | |
629 sub mut_number { | |
630 my ($self,$value) = @_; | |
631 if (defined $value) { | |
632 $self->{'mut_number'} = $value; | |
633 } | |
634 unless (exists $self->{'mut_number'}) { | |
635 return (''); | |
636 } else { | |
637 return $self->{'mut_number'}; | |
638 } | |
639 } | |
640 | |
641 | |
642 =head2 SeqDiff | |
643 | |
644 Title : SeqDiff | |
645 Usage : $mutobj = $obj->SeqDiff; | |
646 : $mutobj = $obj->SeqDiff($objref); | |
647 Function: | |
648 | |
649 Returns or sets the link-reference to the umbrella | |
650 Bio::Variation::SeqDiff object. If there is no link, | |
651 it will return undef | |
652 | |
653 Note: Adding a variant into a SeqDiff object will | |
654 automatically set this value. | |
655 | |
656 Returns : an obj_ref or undef | |
657 | |
658 See L<Bio::Variation::SeqDiff> for more information. | |
659 | |
660 =cut | |
661 | |
662 sub SeqDiff { | |
663 my ($self,$value) = @_; | |
664 if (defined $value) { | |
665 if( ! $value->isa('Bio::Variation::SeqDiff') ) { | |
666 $self->throw("Is not a Bio::Variation::SeqDiff object but a [$value]"); | |
667 return (undef); | |
668 } | |
669 else { | |
670 $self->{'seqDiff'} = $value; | |
671 } | |
672 } | |
673 unless (exists $self->{'seqDiff'}) { | |
674 return (undef); | |
675 } else { | |
676 return $self->{'seqDiff'}; | |
677 } | |
678 } | |
679 | |
680 =head2 add_DBLink | |
681 | |
682 Title : add_DBLink | |
683 Usage : $self->add_DBLink($ref) | |
684 Function: adds a link object | |
685 Example : | |
686 Returns : | |
687 Args : | |
688 | |
689 | |
690 =cut | |
691 | |
692 | |
693 sub add_DBLink{ | |
694 my ($self,$com) = @_; | |
695 if( $com && ! $com->isa('Bio::Annotation::DBLink') ) { | |
696 $self->throw("Is not a link object but a [$com]"); | |
697 } | |
698 $com && push(@{$self->{'link'}},$com); | |
699 } | |
700 | |
701 =head2 each_DBLink | |
702 | |
703 Title : each_DBLink | |
704 Usage : foreach $ref ( $self->each_DBlink() ) | |
705 Function: gets an array of DBlink of objects | |
706 Example : | |
707 Returns : | |
708 Args : | |
709 | |
710 | |
711 =cut | |
712 | |
713 sub each_DBLink{ | |
714 my ($self) = @_; | |
715 | |
716 return @{$self->{'link'}}; | |
717 } | |
718 | |
719 =head2 restriction_changes | |
720 | |
721 Title : restriction_changes | |
722 Usage : $obj->restriction_changes(); | |
723 Function: | |
724 | |
725 Returns a string containing a list of restriction | |
726 enzyme changes of form +EcoRI, separated by | |
727 commas. Strings need to be valid restriction enzyme names | |
728 as stored in REBASE. allele_ori and allele_mut need to be assigned. | |
729 | |
730 Example : | |
731 Returns : string | |
732 Args : string | |
733 | |
734 =cut | |
735 | |
736 sub restriction_changes { | |
737 my ($self) = @_; | |
738 | |
739 if (not $self->{'re_changes'}) { | |
740 my %re = &_enzymes; | |
741 | |
742 # complain if used on AA data | |
743 if ($self->isa('Bio::Variation::AAChange')) { | |
744 $self->throw('Restriction enzymes do not bite polypeptides!'); | |
745 } | |
746 | |
747 #sanity checks | |
748 $self->warn('Upstream sequence is empty!') | |
749 if $self->upStreamSeq eq ''; | |
750 $self->warn('Downstream sequence is empty!') | |
751 if $self->dnStreamSeq eq ''; | |
752 # $self->warn('Original allele sequence is empty!') | |
753 # if $self->allele_ori eq ''; | |
754 # $self->warn('Mutated allele sequence is empty!') | |
755 # if $self->allele_mut eq ''; | |
756 | |
757 #reuse the non empty DNA level list at RNA level if the flanks are identical | |
758 #Hint: Check DNAMutation object first | |
759 if ($self->isa('Bio::Variation::RNAChange') and $self->DNAMutation and | |
760 $self->upStreamSeq eq $self->DNAMutation->upStreamSeq and | |
761 $self->dnStreamSeq eq $self->DNAMutation->dnStreamSeq and | |
762 $self->DNAMutation->restriction_changes ne '' ) { | |
763 $self->{'re_changes'} = $self->DNAMutation->restriction_changes; | |
764 } else { | |
765 | |
766 #maximum length of a type II restriction site in the current REBASE | |
767 my ($le_dn) = 15; | |
768 my ($le_up) = $le_dn; | |
769 | |
770 #reduce the flank lengths if the desired length is not available | |
771 $le_dn = CORE::length ($self->dnStreamSeq) if $le_dn > CORE::length ($self->dnStreamSeq); | |
772 $le_up = CORE::length ($self->upStreamSeq) if $le_up > CORE::length ($self->upStreamSeq); | |
773 | |
774 #Build sequence strings to compare | |
775 my ($oriseq, $mutseq); | |
776 $oriseq = $mutseq = substr($self->upStreamSeq, -$le_up, $le_up); | |
777 $oriseq .= $self->allele_ori->seq if $self->allele_ori->seq; | |
778 $mutseq .= $self->allele_mut->seq if $self->allele_mut->seq; | |
779 $oriseq .= substr($self->dnStreamSeq, 0, $le_dn); | |
780 $mutseq .= substr($self->dnStreamSeq, 0, $le_dn); | |
781 | |
782 # ... and their reverse complements | |
783 my $oriseq_rev = _revcompl ($oriseq); | |
784 my $mutseq_rev = _revcompl ($mutseq); | |
785 | |
786 # collect results into a string | |
787 my $rec = ''; | |
788 foreach my $enz (sort keys (%re)) { | |
789 my $site = $re{$enz}; | |
790 my @ori = ($oriseq=~ /$site/g); | |
791 my @mut = ($mutseq=~ /$site/g); | |
792 my @ori_r = ($oriseq_rev =~ /$site/g); | |
793 my @mut_r = ($mutseq_rev =~ /$site/g); | |
794 | |
795 $rec .= '+'. $enz. ", " | |
796 if (scalar @ori < scalar @mut) or (scalar @ori_r < scalar @mut_r); | |
797 $rec .= '-'. $enz. ", " | |
798 if (scalar @ori > scalar @mut) or (scalar @ori_r > scalar @mut_r); | |
799 | |
800 } | |
801 $rec = substr($rec, 0, CORE::length($rec) - 2) if $rec ne ''; | |
802 $self->{'re_changes'} = $rec; | |
803 } | |
804 } | |
805 return $self->{'re_changes'} | |
806 } | |
807 | |
808 | |
809 sub _revcompl { | |
810 # side effect: lower case letters | |
811 my ($seq) = shift; | |
812 | |
813 $seq = lc $seq; | |
814 $seq =~ tr/acgtrymkswhbvdnx/tgcayrkmswdvbhnx/; | |
815 return CORE::reverse $seq; | |
816 } | |
817 | |
818 | |
819 sub _enzymes { | |
820 #REBASE version 005 type2.005 | |
821 my %enzymes = ( | |
822 'AarI' => 'cacctgc', | |
823 'AatII' => 'gacgtc', | |
824 'AccI' => 'gt[ac][gt]ac', | |
825 'AceIII' => 'cagctc', | |
826 'AciI' => 'ccgc', | |
827 'AclI' => 'aacgtt', | |
828 'AcyI' => 'g[ag]cg[ct]c', | |
829 'AflII' => 'cttaag', | |
830 'AflIII' => 'ac[ag][ct]gt', | |
831 'AgeI' => 'accggt', | |
832 'AhaIII' => 'tttaaa', | |
833 'AloI' => 'gaac[acgt][acgt][acgt][acgt][acgt][acgt]tcc', | |
834 'AluI' => 'agct', | |
835 'AlwNI' => 'cag[acgt][acgt][acgt]ctg', | |
836 'ApaBI' => 'gca[acgt][acgt][acgt][acgt][acgt]tgc', | |
837 'ApaI' => 'gggccc', | |
838 'ApaLI' => 'gtgcac', | |
839 'ApoI' => '[ag]aatt[ct]', | |
840 'AscI' => 'ggcgcgcc', | |
841 'AsuI' => 'gg[acgt]cc', | |
842 'AsuII' => 'ttcgaa', | |
843 'AvaI' => 'c[ct]cg[ag]g', | |
844 'AvaII' => 'gg[at]cc', | |
845 'AvaIII' => 'atgcat', | |
846 'AvrII' => 'cctagg', | |
847 'BaeI' => 'ac[acgt][acgt][acgt][acgt]gta[ct]c', | |
848 'BalI' => 'tggcca', | |
849 'BamHI' => 'ggatcc', | |
850 'BbvCI' => 'cctcagc', | |
851 'BbvI' => 'gcagc', | |
852 'BbvII' => 'gaagac', | |
853 'BccI' => 'ccatc', | |
854 'Bce83I' => 'cttgag', | |
855 'BcefI' => 'acggc', | |
856 'BcgI' => 'cga[acgt][acgt][acgt][acgt][acgt][acgt]tgc', | |
857 'BciVI' => 'gtatcc', | |
858 'BclI' => 'tgatca', | |
859 'BetI' => '[at]ccgg[at]', | |
860 'BfiI' => 'actggg', | |
861 'BglI' => 'gcc[acgt][acgt][acgt][acgt][acgt]ggc', | |
862 'BglII' => 'agatct', | |
863 'BinI' => 'ggatc', | |
864 'BmgI' => 'g[gt]gccc', | |
865 'BplI' => 'gag[acgt][acgt][acgt][acgt][acgt]ctc', | |
866 'Bpu10I' => 'cct[acgt]agc', | |
867 'BsaAI' => '[ct]acgt[ag]', | |
868 'BsaBI' => 'gat[acgt][acgt][acgt][acgt]atc', | |
869 'BsaXI' => 'ac[acgt][acgt][acgt][acgt][acgt]ctcc', | |
870 'BsbI' => 'caacac', | |
871 'BscGI' => 'cccgt', | |
872 'BseMII' => 'ctcag', | |
873 'BsePI' => 'gcgcgc', | |
874 'BseRI' => 'gaggag', | |
875 'BseSI' => 'g[gt]gc[ac]c', | |
876 'BsgI' => 'gtgcag', | |
877 'BsiI' => 'cacgag', | |
878 'BsiYI' => 'cc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gg', | |
879 'BsmAI' => 'gtctc', | |
880 'BsmI' => 'gaatgc', | |
881 'Bsp1407I' => 'tgtaca', | |
882 'Bsp24I' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]tgg', | |
883 'BspGI' => 'ctggac', | |
884 'BspHI' => 'tcatga', | |
885 'BspLU11I' => 'acatgt', | |
886 'BspMI' => 'acctgc', | |
887 'BspMII' => 'tccgga', | |
888 'BsrBI' => 'ccgctc', | |
889 'BsrDI' => 'gcaatg', | |
890 'BsrI' => 'actgg', | |
891 'BstEII' => 'ggt[acgt]acc', | |
892 'BstXI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]tgg', | |
893 'BtrI' => 'cacgtc', | |
894 'BtsI' => 'gcagtg', | |
895 'Cac8I' => 'gc[acgt][acgt]gc', | |
896 'CauII' => 'cc[cg]gg', | |
897 'Cfr10I' => '[ag]ccgg[ct]', | |
898 'CfrI' => '[ct]ggcc[ag]', | |
899 'CjeI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]gt', | |
900 'CjePI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt]tc', | |
901 'ClaI' => 'atcgat', | |
902 'CviJI' => '[ag]gc[ct]', | |
903 'CviRI' => 'tgca', | |
904 'DdeI' => 'ct[acgt]ag', | |
905 'DpnI' => 'gatc', | |
906 'DraII' => '[ag]gg[acgt]cc[ct]', | |
907 'DraIII' => 'cac[acgt][acgt][acgt]gtg', | |
908 'DrdI' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]gtc', | |
909 'DrdII' => 'gaacca', | |
910 'DsaI' => 'cc[ag][ct]gg', | |
911 'Eam1105I' => 'gac[acgt][acgt][acgt][acgt][acgt]gtc', | |
912 'EciI' => 'ggcgga', | |
913 'Eco31I' => 'ggtctc', | |
914 'Eco47III' => 'agcgct', | |
915 'Eco57I' => 'ctgaag', | |
916 'EcoNI' => 'cct[acgt][acgt][acgt][acgt][acgt]agg', | |
917 'EcoRI' => 'gaattc', | |
918 'EcoRII' => 'cc[at]gg', | |
919 'EcoRV' => 'gatatc', | |
920 'Esp3I' => 'cgtctc', | |
921 'EspI' => 'gct[acgt]agc', | |
922 'FauI' => 'cccgc', | |
923 'FinI' => 'gggac', | |
924 'Fnu4HI' => 'gc[acgt]gc', | |
925 'FnuDII' => 'cgcg', | |
926 'FokI' => 'ggatg', | |
927 'FseI' => 'ggccggcc', | |
928 'GdiII' => 'cggcc[ag]', | |
929 'GsuI' => 'ctggag', | |
930 'HaeI' => '[at]ggcc[at]', | |
931 'HaeII' => '[ag]gcgc[ct]', | |
932 'HaeIII' => 'ggcc', | |
933 'HaeIV' => 'ga[ct][acgt][acgt][acgt][acgt][acgt][ag]tc', | |
934 'HgaI' => 'gacgc', | |
935 'HgiAI' => 'g[at]gc[at]c', | |
936 'HgiCI' => 'gg[ct][ag]cc', | |
937 'HgiEII' => 'acc[acgt][acgt][acgt][acgt][acgt][acgt]ggt', | |
938 'HgiJII' => 'g[ag]gc[ct]c', | |
939 'HhaI' => 'gcgc', | |
940 'Hin4I' => 'ga[cgt][acgt][acgt][acgt][acgt][acgt][acg]tc', | |
941 'HindII' => 'gt[ct][ag]ac', | |
942 'HindIII' => 'aagctt', | |
943 'HinfI' => 'ga[acgt]tc', | |
944 'HpaI' => 'gttaac', | |
945 'HpaII' => 'ccgg', | |
946 'HphI' => 'ggtga', | |
947 'Hpy178III' => 'tc[acgt][acgt]ga', | |
948 'Hpy188I' => 'tc[acgt]ga', | |
949 'Hpy99I' => 'cg[at]cg', | |
950 'KpnI' => 'ggtacc', | |
951 'Ksp632I' => 'ctcttc', | |
952 'MaeI' => 'ctag', | |
953 'MaeII' => 'acgt', | |
954 'MaeIII' => 'gt[acgt]ac', | |
955 'MboI' => 'gatc', | |
956 'MboII' => 'gaaga', | |
957 'McrI' => 'cg[ag][ct]cg', | |
958 'MfeI' => 'caattg', | |
959 'MjaIV' => 'gt[acgt][acgt]ac', | |
960 'MluI' => 'acgcgt', | |
961 'MmeI' => 'tcc[ag]ac', | |
962 'MnlI' => 'cctc', | |
963 'MseI' => 'ttaa', | |
964 'MslI' => 'ca[ct][acgt][acgt][acgt][acgt][ag]tg', | |
965 'MstI' => 'tgcgca', | |
966 'MwoI' => 'gc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gc', | |
967 'NaeI' => 'gccggc', | |
968 'NarI' => 'ggcgcc', | |
969 'NcoI' => 'ccatgg', | |
970 'NdeI' => 'catatg', | |
971 'NheI' => 'gctagc', | |
972 'NlaIII' => 'catg', | |
973 'NlaIV' => 'gg[acgt][acgt]cc', | |
974 'NotI' => 'gcggccgc', | |
975 'NruI' => 'tcgcga', | |
976 'NspBII' => 'c[ac]gc[gt]g', | |
977 'NspI' => '[ag]catg[ct]', | |
978 'PacI' => 'ttaattaa', | |
979 'Pfl1108I' => 'tcgtag', | |
980 'PflMI' => 'cca[acgt][acgt][acgt][acgt][acgt]tgg', | |
981 'PleI' => 'gagtc', | |
982 'PmaCI' => 'cacgtg', | |
983 'PmeI' => 'gtttaaac', | |
984 'PpiI' => 'gaac[acgt][acgt][acgt][acgt][acgt]ctc', | |
985 'PpuMI' => '[ag]gg[at]cc[ct]', | |
986 'PshAI' => 'gac[acgt][acgt][acgt][acgt]gtc', | |
987 'PsiI' => 'ttataa', | |
988 'PstI' => 'ctgcag', | |
989 'PvuI' => 'cgatcg', | |
990 'PvuII' => 'cagctg', | |
991 'RleAI' => 'cccaca', | |
992 'RsaI' => 'gtac', | |
993 'RsrII' => 'cgg[at]ccg', | |
994 'SacI' => 'gagctc', | |
995 'SacII' => 'ccgcgg', | |
996 'SalI' => 'gtcgac', | |
997 'SanDI' => 'ggg[at]ccc', | |
998 'SapI' => 'gctcttc', | |
999 'SauI' => 'cct[acgt]agg', | |
1000 'ScaI' => 'agtact', | |
1001 'ScrFI' => 'cc[acgt]gg', | |
1002 'SduI' => 'g[agt]gc[act]c', | |
1003 'SecI' => 'cc[acgt][acgt]gg', | |
1004 'SexAI' => 'acc[at]ggt', | |
1005 'SfaNI' => 'gcatc', | |
1006 'SfeI' => 'ct[ag][ct]ag', | |
1007 'SfiI' => 'ggcc[acgt][acgt][acgt][acgt][acgt]ggcc', | |
1008 'SgfI' => 'gcgatcgc', | |
1009 'SgrAI' => 'c[ag]ccgg[ct]g', | |
1010 'SimI' => 'gggtc', | |
1011 'SmaI' => 'cccggg', | |
1012 'SmlI' => 'ct[ct][ag]ag', | |
1013 'SnaBI' => 'tacgta', | |
1014 'SnaI' => 'gtatac', | |
1015 'SpeI' => 'actagt', | |
1016 'SphI' => 'gcatgc', | |
1017 'SplI' => 'cgtacg', | |
1018 'SrfI' => 'gcccgggc', | |
1019 'Sse232I' => 'cgccggcg', | |
1020 'Sse8387I' => 'cctgcagg', | |
1021 'Sse8647I' => 'agg[at]cct', | |
1022 'SspI' => 'aatatt', | |
1023 'Sth132I' => 'cccg', | |
1024 'StuI' => 'aggcct', | |
1025 'StyI' => 'cc[at][at]gg', | |
1026 'SwaI' => 'atttaaat', | |
1027 'TaqI' => 'tcga', | |
1028 'TaqII' => 'gaccga', | |
1029 'TatI' => '[at]gtac[at]', | |
1030 'TauI' => 'gc[cg]gc', | |
1031 'TfiI' => 'ga[at]tc', | |
1032 'TseI' => 'gc[at]gc', | |
1033 'Tsp45I' => 'gt[cg]ac', | |
1034 'Tsp4CI' => 'ac[acgt]gt', | |
1035 'TspEI' => 'aatt', | |
1036 'TspRI' => 'ca[cg]tg[acgt][acgt]', | |
1037 'Tth111I' => 'gac[acgt][acgt][acgt]gtc', | |
1038 'Tth111II' => 'caa[ag]ca', | |
1039 'UbaGI' => 'cac[acgt][acgt][acgt][acgt]gtg', | |
1040 'UbaPI' => 'cgaacg', | |
1041 'VspI' => 'attaat', | |
1042 'XbaI' => 'tctaga', | |
1043 'XcmI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt]tgg', | |
1044 'XhoI' => 'ctcgag', | |
1045 'XhoII' => '[ag]gatc[ct]', | |
1046 'XmaIII' => 'cggccg', | |
1047 'XmnI' => 'gaa[acgt][acgt][acgt][acgt]ttc' | |
1048 ); | |
1049 | |
1050 return %enzymes; | |
1051 } | |
1052 | |
1053 1; |