0
|
1 # $Id: VariantI.pm,v 1.12 2002/10/22 07:38:49 lapp Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Variation::VariantI
|
|
4 #
|
|
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
|
|
6 #
|
|
7 # Copyright Heikki Lehvaslaiho
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Variation::VariantI - Sequence Change SeqFeature abstract class
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 #get Bio::Variant::VariantI somehow
|
|
20 print $var->restriction_changes, "\n";
|
|
21 foreach $allele ($var->each_Allele) {
|
|
22 #work on Bio::Variation::Allele objects
|
|
23 }
|
|
24
|
|
25 =head1 DESCRIPTION
|
|
26
|
|
27 This superclass defines common methods to basic sequence changes. The
|
|
28 instantiable classes Bio::Variation::DNAMutation,
|
|
29 Bio::Variation::RNAChange and Bio::Variation::AAChange use them.
|
|
30 See L<Bio::Variation::DNAMutation>, L<Bio::Variation::RNAChange>,
|
|
31 and L<Bio::Variation::AAChange> for more information.
|
|
32
|
|
33 These classes store information, heavy computation to detemine allele
|
|
34 sequences is done elsewhere.
|
|
35
|
|
36 The database cross-references are implemented as
|
|
37 Bio::Annotation::DBLink objects. The methods to access them are
|
|
38 defined in Bio::DBLinkContainerI. See L<Bio::Annotation::DBLink>
|
|
39 and L<Bio::DBLinkContainerI> for details.
|
|
40
|
|
41 Bio::Variation::VariantI redifines and extends
|
|
42 Bio::SeqFeature::Generic for sequence variations. This class
|
|
43 describes specific sequence change events. These events are always
|
|
44 from a specific reference sequence to something different. See
|
|
45 L<Bio::SeqFeature::Generic> for more information.
|
|
46
|
|
47 IMPORTANT: The notion of reference sequence permeates all
|
|
48 Bio::Variation classes. This is especially important to remember when
|
|
49 dealing with Alleles. In a polymorphic site, there can be a large
|
|
50 number of alleles. One of then has to be selected to be the reference
|
|
51 allele (allele_ori). ALL the rest has to be passed to the Variant
|
|
52 using the method add_Allele, including the mutated allele in a
|
|
53 canonical mutation. The IO modules and generated attributes depend on
|
|
54 it. They ignore the allele linked to using allele_mut and circulate
|
|
55 each Allele returned by each_Allele into allele_mut and calculate
|
|
56 the changes between that and allele_ori.
|
|
57
|
|
58
|
|
59 =head1 FEEDBACK
|
|
60
|
|
61 =head2 Mailing Lists
|
|
62
|
|
63 User feedback is an integral part of the evolution of this and other
|
|
64 Bioperl modules. Send your comments and suggestions preferably to the
|
|
65 Bioperl mailing lists Your participation is much appreciated.
|
|
66
|
|
67 bioperl-l@bioperl.org - General discussion
|
|
68 http://bio.perl.org/MailList.html - About the mailing lists
|
|
69
|
|
70 =head2 Reporting Bugs
|
|
71
|
|
72 report bugs to the Bioperl bug tracking system to help us keep track
|
|
73 the bugs and their resolution. Bug reports can be submitted via
|
|
74 email or the web:
|
|
75
|
|
76 bioperl-bugs@bio.perl.org
|
|
77 http://bugzilla.bioperl.org/
|
|
78
|
|
79 =head1 AUTHOR - Heikki Lehvaslaiho
|
|
80
|
|
81 Email: heikki@ebi.ac.uk
|
|
82 Address:
|
|
83
|
|
84 EMBL Outstation, European Bioinformatics Institute
|
|
85 Wellcome Trust Genome Campus, Hinxton
|
|
86 Cambs. CB10 1SD, United Kingdom
|
|
87
|
|
88
|
|
89 =head1 APPENDIX
|
|
90
|
|
91 The rest of the documentation details each of the object
|
|
92 methods. Internal methods are usually preceded with a _
|
|
93
|
|
94 =cut
|
|
95
|
|
96
|
|
97 # Let the code begin...
|
|
98
|
|
99
|
|
100 package Bio::Variation::VariantI;
|
|
101 $VERSION=1.0;
|
|
102 use vars qw(@ISA);
|
|
103 use strict;
|
|
104 use Bio::Root::Root;
|
|
105 use Bio::DBLinkContainerI;
|
|
106 # Object preamble - inheritance
|
|
107
|
|
108 use Bio::SeqFeature::Generic;
|
|
109 @ISA = qw(Bio::Root::Root Bio::SeqFeature::Generic Bio::DBLinkContainerI );
|
|
110
|
|
111 =head2 id
|
|
112
|
|
113 Title : id
|
|
114 Usage : $obj->id
|
|
115 Function:
|
|
116
|
|
117 Read only method. Returns the id of the variation object.
|
|
118 The id is the id of the first DBLink object attached to this object.
|
|
119
|
|
120 Example :
|
|
121 Returns : scalar
|
|
122 Args : none
|
|
123
|
|
124 =cut
|
|
125
|
|
126 sub id {
|
|
127 my ($self) = @_;
|
|
128 my @ids = $self->each_DBLink;
|
|
129 my $id = $ids[0] if scalar @ids > 0;
|
|
130 return $id->database. "::". $id->primary_id if $id;
|
|
131 }
|
|
132
|
|
133
|
|
134 =head2 add_Allele
|
|
135
|
|
136 Title : add_Allele
|
|
137 Usage : $self->add_Allele($allele)
|
|
138 Function:
|
|
139
|
|
140 Adds one Bio::Variation::Allele into the list of alleles.
|
|
141 Note that the method forces the convention that nucleotide
|
|
142 sequence is in lower case and amino acds are in upper
|
|
143 case.
|
|
144
|
|
145 Example :
|
|
146 Returns : 1 when succeeds, 0 for failure.
|
|
147 Args : Allele object
|
|
148
|
|
149 =cut
|
|
150
|
|
151
|
|
152 sub add_Allele {
|
|
153 my ($self,$value) = @_;
|
|
154 if (defined $value) {
|
|
155 if( ! $value->isa('Bio::Variation::Allele') ) {
|
|
156 my $com = ref $value;
|
|
157 $self->throw("Is not a Allele object but a [$com]");
|
|
158 return 0;
|
|
159 } else {
|
|
160 if ( $self->isa('Bio::Variation::AAChange') ) {
|
|
161 $value->seq( uc $value->seq) if $value->seq;
|
|
162 } else {
|
|
163 $value->seq( lc $value->seq) if $value->seq;
|
|
164 }
|
|
165 push(@{$self->{'alleles'}},$value);
|
|
166 $self->allele_mut($value); #????
|
|
167 return 1;
|
|
168 }
|
|
169 } else {
|
|
170 return 0;
|
|
171 }
|
|
172 }
|
|
173
|
|
174
|
|
175 =head2 each_Allele
|
|
176
|
|
177 Title : alleles
|
|
178 Usage : $obj->each_Allele();
|
|
179 Function:
|
|
180
|
|
181 Returns a list of Bio::Variation::Allele objects
|
|
182
|
|
183 Example :
|
|
184 Returns : list of Alleles
|
|
185 Args : none
|
|
186
|
|
187 =cut
|
|
188
|
|
189 sub each_Allele{
|
|
190 my ($self,@args) = @_;
|
|
191 return @{$self->{'alleles'}};
|
|
192 }
|
|
193
|
|
194
|
|
195
|
|
196 =head2 isMutation
|
|
197
|
|
198 Title : isMutation
|
|
199 Usage : print join('/', $obj->each_Allele) if not $obj->isMutation;
|
|
200 Function:
|
|
201
|
|
202 Returns or sets the boolean value indicating that the
|
|
203 variant descibed is a canonical mutation with two alleles
|
|
204 assinged to be the original (wild type) allele and mutated
|
|
205 allele, respectively. If this value is not set, it is
|
|
206 assumed that the Variant descibes polymorphisms.
|
|
207
|
|
208 Returns : a boolean
|
|
209
|
|
210 =cut
|
|
211
|
|
212 sub isMutation {
|
|
213 my ($self,$value) = @_;
|
|
214 if (defined $value) {
|
|
215 if ($value ) {
|
|
216 $self->{'isMutation'} = 1;
|
|
217 } else {
|
|
218 $self->{'isMutation'} = 0;
|
|
219 }
|
|
220 }
|
|
221 return $self->{'isMutation'};
|
|
222 }
|
|
223
|
|
224
|
|
225 =head2 allele_ori
|
|
226
|
|
227 Title : allele_ori
|
|
228 Usage : $obj->allele_ori();
|
|
229 Function:
|
|
230
|
|
231 Links to and returns the Bio::Variation::Allele object.
|
|
232 If value is not set, returns false. All other Alleles are
|
|
233 compared to this.
|
|
234
|
|
235 Amino acid sequences are stored in upper case characters,
|
|
236 others in lower case.
|
|
237
|
|
238 Example :
|
|
239 Returns : string
|
|
240 Args : string
|
|
241
|
|
242 See L<Bio::Variation::Allele> for more.
|
|
243
|
|
244 =cut
|
|
245
|
|
246 sub allele_ori {
|
|
247 my ($self,$value) = @_;
|
|
248 if( defined $value) {
|
|
249 if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) {
|
|
250 $self->throw("Value is not Bio::Variation::Allele but [$value]");
|
|
251 } else {
|
|
252 if ( $self->isa('Bio::Variation::AAChange') ) {
|
|
253 $value->seq( uc $value->seq) if $value->seq;
|
|
254 } else {
|
|
255 $value->seq( lc $value->seq) if $value->seq;
|
|
256 }
|
|
257 $self->{'allele_ori'} = $value;
|
|
258 }
|
|
259 }
|
|
260 return $self->{'allele_ori'};
|
|
261 }
|
|
262
|
|
263
|
|
264 =head2 allele_mut
|
|
265
|
|
266 Title : allele_mut
|
|
267 Usage : $obj->allele_mut();
|
|
268 Function:
|
|
269
|
|
270 Links to and returns the Bio::Variation::Allele
|
|
271 object. Sets and returns the mutated allele sequence.
|
|
272 If value is not set, returns false.
|
|
273
|
|
274 Amino acid sequences are stored in upper case characters,
|
|
275 others in lower case.
|
|
276
|
|
277 Example :
|
|
278 Returns : string
|
|
279 Args : string
|
|
280
|
|
281 See L<Bio::Variation::Allele> for more.
|
|
282
|
|
283 =cut
|
|
284
|
|
285
|
|
286 sub allele_mut {
|
|
287 my ($self,$value) = @_;
|
|
288 if( defined $value) {
|
|
289 if ( ! ref $value || ! $value->isa('Bio::Variation::Allele')) {
|
|
290 $self->throw("Value is not Bio::Variation::Allele but [$value]");
|
|
291 } else {
|
|
292 if ( $self->isa('Bio::Variation::AAChange') ) {
|
|
293 $value->seq( uc $value->seq) if $value->seq;
|
|
294 } else {
|
|
295 $value->seq( lc $value->seq) if $value->seq;
|
|
296 }
|
|
297 $self->{'allele_mut'} = $value;
|
|
298 }
|
|
299 }
|
|
300 return $self->{'allele_mut'};
|
|
301 }
|
|
302
|
|
303 =head2 length
|
|
304
|
|
305 Title : length
|
|
306 Usage : $obj->length();
|
|
307 Function:
|
|
308
|
|
309 Sets and returns the length of the affected original
|
|
310 allele sequence. If value is not set, returns false == 0.
|
|
311
|
|
312 Value 0 means that the variant position is before the
|
|
313 start=end sequence position. (Value 1 would denote a point
|
|
314 mutation). This follows the convension to report an
|
|
315 insertion (2insT) in equivalent way to a corresponding
|
|
316 deletion (2delT) (Think about indel polymorpism ATC <=> AC
|
|
317 where the origianal state is not known ).
|
|
318
|
|
319 Example :
|
|
320 Returns : string
|
|
321 Args : string
|
|
322
|
|
323 =cut
|
|
324
|
|
325
|
|
326 sub length {
|
|
327 my ($self,$value) = @_;
|
|
328 if ( defined $value) {
|
|
329 $self->{'length'} = $value;
|
|
330 }
|
|
331 if ( ! exists $self->{'length'} ) {
|
|
332 return 0;
|
|
333 }
|
|
334 return $self->{'length'};
|
|
335 }
|
|
336
|
|
337 =head2 upStreamSeq
|
|
338
|
|
339 Title : upStreamSeq
|
|
340 Usage : $obj->upStreamSeq();
|
|
341 Function:
|
|
342
|
|
343 Sets and returns upstream flanking sequence string. If
|
|
344 value is not set, returns false. The sequence should be
|
|
345 >=25 characters long, if possible.
|
|
346
|
|
347 Example :
|
|
348 Returns : string or false
|
|
349 Args : string
|
|
350
|
|
351 =cut
|
|
352
|
|
353
|
|
354 sub upStreamSeq {
|
|
355 my ($self,$value) = @_;
|
|
356 if( defined $value) {
|
|
357 $self->{'upstreamseq'} = $value;
|
|
358 }
|
|
359 return $self->{'upstreamseq'};
|
|
360 }
|
|
361
|
|
362
|
|
363 =head2 dnStreamSeq
|
|
364
|
|
365 Title : dnStreamSeq
|
|
366 Usage : $obj->dnStreamSeq();
|
|
367 Function:
|
|
368
|
|
369 Sets and returns dnstream flanking sequence string. If
|
|
370 value is not set, returns false. The sequence should be
|
|
371 >=25 characters long, if possible.
|
|
372
|
|
373 Example :
|
|
374 Returns : string or false
|
|
375 Args : string
|
|
376
|
|
377 =cut
|
|
378
|
|
379
|
|
380 sub dnStreamSeq {
|
|
381 my ($self,$value) = @_;
|
|
382 if( defined $value) {
|
|
383 $self->{'dnstreamseq'} = $value;
|
|
384 }
|
|
385 return $self->{'dnstreamseq'};
|
|
386
|
|
387 }
|
|
388
|
|
389
|
|
390 =head2 label
|
|
391
|
|
392 Title : label
|
|
393 Usage : $obj->label();
|
|
394 Function:
|
|
395
|
|
396 Sets and returns mutation event label(s). If value is not
|
|
397 set, or no argument is given returns false. Each
|
|
398 instantiable class needs to implement this method. Valid
|
|
399 values are listed in 'Mutation event controlled vocabulary' in
|
|
400 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html.
|
|
401
|
|
402 Example :
|
|
403 Returns : string
|
|
404 Args : string
|
|
405
|
|
406 =cut
|
|
407
|
|
408
|
|
409 sub label {
|
|
410 my ($self,$value) = @_;
|
|
411 $self->throw("[$self] has not implemeted method 'label'");
|
|
412 }
|
|
413
|
|
414
|
|
415
|
|
416 =head2 status
|
|
417
|
|
418 Title : status
|
|
419 Usage : $obj->status()
|
|
420 Function:
|
|
421
|
|
422 Returns the status of the sequence change object.
|
|
423 Valid values are: 'suspected' and 'proven'
|
|
424
|
|
425 Example : $obj->status('proven');
|
|
426 Returns : scalar
|
|
427 Args : valid string (optional, for setting)
|
|
428
|
|
429
|
|
430 =cut
|
|
431
|
|
432
|
|
433 sub status {
|
|
434 my ($self,$value) = @_;
|
|
435 my %status = (suspected => 1,
|
|
436 proven => 1
|
|
437 );
|
|
438
|
|
439 if( defined $value) {
|
|
440 $value = lc $value;
|
|
441 if ($status{$value}) {
|
|
442 $self->{'status'} = $value;
|
|
443 }
|
|
444 else {
|
|
445 $self->throw("$value is not valid status value!");
|
|
446 }
|
|
447 }
|
|
448 if( ! exists $self->{'status'} ) {
|
|
449 return "$self";
|
|
450 }
|
|
451 return $self->{'status'};
|
|
452 }
|
|
453
|
|
454
|
|
455 =head2 proof
|
|
456
|
|
457 Title : proof
|
|
458 Usage : $obj->proof()
|
|
459 Function:
|
|
460
|
|
461 Returns the proof of the sequence change object.
|
|
462 Valid values are: 'computed' and 'experimental'.
|
|
463
|
|
464 Example : $obj->proof('computed');
|
|
465 Returns : scalar
|
|
466 Args : valid string (optional, for setting)
|
|
467
|
|
468
|
|
469 =cut
|
|
470
|
|
471
|
|
472 sub proof {
|
|
473 my ($self,$value) = @_;
|
|
474 my %proof = (computed => 1,
|
|
475 experimental => 1
|
|
476 );
|
|
477
|
|
478 if( defined $value) {
|
|
479 $value = lc $value;
|
|
480 if ($proof{$value}) {
|
|
481 $self->{'proof'} = $value;
|
|
482 } else {
|
|
483 $self->throw("$value is not valid proof value!");
|
|
484 }
|
|
485 }
|
|
486 return $self->{'proof'};
|
|
487 }
|
|
488
|
|
489
|
|
490 =head2 region
|
|
491
|
|
492 Title : region
|
|
493 Usage : $obj->region();
|
|
494 Function:
|
|
495
|
|
496 Sets and returns the name of the sequence region type or
|
|
497 protein domain at this location. If value is not set,
|
|
498 returns false.
|
|
499
|
|
500 Example :
|
|
501 Returns : string
|
|
502 Args : string
|
|
503
|
|
504 =cut
|
|
505
|
|
506
|
|
507 sub region {
|
|
508 my ($self,$value) = @_;
|
|
509 if( defined $value) {
|
|
510 $self->{'region'} = $value;
|
|
511 }
|
|
512 return $self->{'region'};
|
|
513 }
|
|
514
|
|
515
|
|
516 =head2 region_value
|
|
517
|
|
518 Title : region_value
|
|
519 Usage : $obj->region_value();
|
|
520 Function:
|
|
521
|
|
522 Sets and returns the name of the sequence region_value or
|
|
523 protein domain at this location. If value is not set,
|
|
524 returns false.
|
|
525
|
|
526 Example :
|
|
527 Returns : string
|
|
528 Args : string
|
|
529
|
|
530 =cut
|
|
531
|
|
532
|
|
533 sub region_value {
|
|
534 my ($self,$value) = @_;
|
|
535 if( defined $value) {
|
|
536 $self->{'region_value'} = $value;
|
|
537 }
|
|
538 return $self->{'region_value'};
|
|
539 }
|
|
540
|
|
541 =head2 region_dist
|
|
542
|
|
543 Title : region_dist
|
|
544 Usage : $obj->region_dist();
|
|
545 Function:
|
|
546
|
|
547 Sets and returns the distance tot the closest region
|
|
548 (i.e. intro/exon or domain) boundary. If distance is not
|
|
549 set, returns false.
|
|
550
|
|
551 Example :
|
|
552 Returns : integer
|
|
553 Args : integer
|
|
554
|
|
555 =cut
|
|
556
|
|
557
|
|
558 sub region_dist {
|
|
559 my ($self,$value) = @_;
|
|
560 if( defined $value) {
|
|
561 if ( not $value =~ /^[+-]?\d+$/ ) {
|
|
562 $self->throw("[$value] for region_dist has to be an integer\n");
|
|
563 } else {
|
|
564 $self->{'region_dist'} = $value;
|
|
565 }
|
|
566 }
|
|
567 return $self->{'region_dist'};
|
|
568 }
|
|
569
|
|
570
|
|
571 =head2 numbering
|
|
572
|
|
573 Title : numbering
|
|
574 Usage : $obj->numbering()
|
|
575 Function:
|
|
576
|
|
577 Returns the numbering chema used locating sequnce features.
|
|
578 Valid values are: 'entry' and 'coding'
|
|
579
|
|
580 Example : $obj->numbering('coding');
|
|
581 Returns : scalar
|
|
582 Args : valid string (optional, for setting)
|
|
583
|
|
584
|
|
585 =cut
|
|
586
|
|
587
|
|
588 sub numbering {
|
|
589 my ($self,$value) = @_;
|
|
590 my %numbering = (entry => 1,
|
|
591 coding => 1
|
|
592 );
|
|
593
|
|
594 if( defined $value) {
|
|
595 $value = lc $value;
|
|
596 if ($numbering{$value}) {
|
|
597 $self->{'numbering'} = $value;
|
|
598 }
|
|
599 else {
|
|
600 $self->throw("'$value' is not a valid for numbering!");
|
|
601 }
|
|
602 }
|
|
603 if( ! exists $self->{'numbering'} ) {
|
|
604 return "$self";
|
|
605 }
|
|
606 return $self->{'numbering'};
|
|
607 }
|
|
608
|
|
609 =head2 mut_number
|
|
610
|
|
611 Title : mut_number
|
|
612 Usage : $num = $obj->mut_number;
|
|
613 : $num = $obj->mut_number($number);
|
|
614 Function:
|
|
615
|
|
616 Returns or sets the number identifying the order in which the
|
|
617 mutation has been issued. Numbers shouldstart from 1.
|
|
618 If the number has never been set, the method will return ''
|
|
619
|
|
620 If you want the output from IO modules look nice and, for
|
|
621 multivariant/allele variations, make sense you better set
|
|
622 this attribute.
|
|
623
|
|
624 Returns : an integer
|
|
625
|
|
626 =cut
|
|
627
|
|
628
|
|
629 sub mut_number {
|
|
630 my ($self,$value) = @_;
|
|
631 if (defined $value) {
|
|
632 $self->{'mut_number'} = $value;
|
|
633 }
|
|
634 unless (exists $self->{'mut_number'}) {
|
|
635 return ('');
|
|
636 } else {
|
|
637 return $self->{'mut_number'};
|
|
638 }
|
|
639 }
|
|
640
|
|
641
|
|
642 =head2 SeqDiff
|
|
643
|
|
644 Title : SeqDiff
|
|
645 Usage : $mutobj = $obj->SeqDiff;
|
|
646 : $mutobj = $obj->SeqDiff($objref);
|
|
647 Function:
|
|
648
|
|
649 Returns or sets the link-reference to the umbrella
|
|
650 Bio::Variation::SeqDiff object. If there is no link,
|
|
651 it will return undef
|
|
652
|
|
653 Note: Adding a variant into a SeqDiff object will
|
|
654 automatically set this value.
|
|
655
|
|
656 Returns : an obj_ref or undef
|
|
657
|
|
658 See L<Bio::Variation::SeqDiff> for more information.
|
|
659
|
|
660 =cut
|
|
661
|
|
662 sub SeqDiff {
|
|
663 my ($self,$value) = @_;
|
|
664 if (defined $value) {
|
|
665 if( ! $value->isa('Bio::Variation::SeqDiff') ) {
|
|
666 $self->throw("Is not a Bio::Variation::SeqDiff object but a [$value]");
|
|
667 return (undef);
|
|
668 }
|
|
669 else {
|
|
670 $self->{'seqDiff'} = $value;
|
|
671 }
|
|
672 }
|
|
673 unless (exists $self->{'seqDiff'}) {
|
|
674 return (undef);
|
|
675 } else {
|
|
676 return $self->{'seqDiff'};
|
|
677 }
|
|
678 }
|
|
679
|
|
680 =head2 add_DBLink
|
|
681
|
|
682 Title : add_DBLink
|
|
683 Usage : $self->add_DBLink($ref)
|
|
684 Function: adds a link object
|
|
685 Example :
|
|
686 Returns :
|
|
687 Args :
|
|
688
|
|
689
|
|
690 =cut
|
|
691
|
|
692
|
|
693 sub add_DBLink{
|
|
694 my ($self,$com) = @_;
|
|
695 if( $com && ! $com->isa('Bio::Annotation::DBLink') ) {
|
|
696 $self->throw("Is not a link object but a [$com]");
|
|
697 }
|
|
698 $com && push(@{$self->{'link'}},$com);
|
|
699 }
|
|
700
|
|
701 =head2 each_DBLink
|
|
702
|
|
703 Title : each_DBLink
|
|
704 Usage : foreach $ref ( $self->each_DBlink() )
|
|
705 Function: gets an array of DBlink of objects
|
|
706 Example :
|
|
707 Returns :
|
|
708 Args :
|
|
709
|
|
710
|
|
711 =cut
|
|
712
|
|
713 sub each_DBLink{
|
|
714 my ($self) = @_;
|
|
715
|
|
716 return @{$self->{'link'}};
|
|
717 }
|
|
718
|
|
719 =head2 restriction_changes
|
|
720
|
|
721 Title : restriction_changes
|
|
722 Usage : $obj->restriction_changes();
|
|
723 Function:
|
|
724
|
|
725 Returns a string containing a list of restriction
|
|
726 enzyme changes of form +EcoRI, separated by
|
|
727 commas. Strings need to be valid restriction enzyme names
|
|
728 as stored in REBASE. allele_ori and allele_mut need to be assigned.
|
|
729
|
|
730 Example :
|
|
731 Returns : string
|
|
732 Args : string
|
|
733
|
|
734 =cut
|
|
735
|
|
736 sub restriction_changes {
|
|
737 my ($self) = @_;
|
|
738
|
|
739 if (not $self->{'re_changes'}) {
|
|
740 my %re = &_enzymes;
|
|
741
|
|
742 # complain if used on AA data
|
|
743 if ($self->isa('Bio::Variation::AAChange')) {
|
|
744 $self->throw('Restriction enzymes do not bite polypeptides!');
|
|
745 }
|
|
746
|
|
747 #sanity checks
|
|
748 $self->warn('Upstream sequence is empty!')
|
|
749 if $self->upStreamSeq eq '';
|
|
750 $self->warn('Downstream sequence is empty!')
|
|
751 if $self->dnStreamSeq eq '';
|
|
752 # $self->warn('Original allele sequence is empty!')
|
|
753 # if $self->allele_ori eq '';
|
|
754 # $self->warn('Mutated allele sequence is empty!')
|
|
755 # if $self->allele_mut eq '';
|
|
756
|
|
757 #reuse the non empty DNA level list at RNA level if the flanks are identical
|
|
758 #Hint: Check DNAMutation object first
|
|
759 if ($self->isa('Bio::Variation::RNAChange') and $self->DNAMutation and
|
|
760 $self->upStreamSeq eq $self->DNAMutation->upStreamSeq and
|
|
761 $self->dnStreamSeq eq $self->DNAMutation->dnStreamSeq and
|
|
762 $self->DNAMutation->restriction_changes ne '' ) {
|
|
763 $self->{'re_changes'} = $self->DNAMutation->restriction_changes;
|
|
764 } else {
|
|
765
|
|
766 #maximum length of a type II restriction site in the current REBASE
|
|
767 my ($le_dn) = 15;
|
|
768 my ($le_up) = $le_dn;
|
|
769
|
|
770 #reduce the flank lengths if the desired length is not available
|
|
771 $le_dn = CORE::length ($self->dnStreamSeq) if $le_dn > CORE::length ($self->dnStreamSeq);
|
|
772 $le_up = CORE::length ($self->upStreamSeq) if $le_up > CORE::length ($self->upStreamSeq);
|
|
773
|
|
774 #Build sequence strings to compare
|
|
775 my ($oriseq, $mutseq);
|
|
776 $oriseq = $mutseq = substr($self->upStreamSeq, -$le_up, $le_up);
|
|
777 $oriseq .= $self->allele_ori->seq if $self->allele_ori->seq;
|
|
778 $mutseq .= $self->allele_mut->seq if $self->allele_mut->seq;
|
|
779 $oriseq .= substr($self->dnStreamSeq, 0, $le_dn);
|
|
780 $mutseq .= substr($self->dnStreamSeq, 0, $le_dn);
|
|
781
|
|
782 # ... and their reverse complements
|
|
783 my $oriseq_rev = _revcompl ($oriseq);
|
|
784 my $mutseq_rev = _revcompl ($mutseq);
|
|
785
|
|
786 # collect results into a string
|
|
787 my $rec = '';
|
|
788 foreach my $enz (sort keys (%re)) {
|
|
789 my $site = $re{$enz};
|
|
790 my @ori = ($oriseq=~ /$site/g);
|
|
791 my @mut = ($mutseq=~ /$site/g);
|
|
792 my @ori_r = ($oriseq_rev =~ /$site/g);
|
|
793 my @mut_r = ($mutseq_rev =~ /$site/g);
|
|
794
|
|
795 $rec .= '+'. $enz. ", "
|
|
796 if (scalar @ori < scalar @mut) or (scalar @ori_r < scalar @mut_r);
|
|
797 $rec .= '-'. $enz. ", "
|
|
798 if (scalar @ori > scalar @mut) or (scalar @ori_r > scalar @mut_r);
|
|
799
|
|
800 }
|
|
801 $rec = substr($rec, 0, CORE::length($rec) - 2) if $rec ne '';
|
|
802 $self->{'re_changes'} = $rec;
|
|
803 }
|
|
804 }
|
|
805 return $self->{'re_changes'}
|
|
806 }
|
|
807
|
|
808
|
|
809 sub _revcompl {
|
|
810 # side effect: lower case letters
|
|
811 my ($seq) = shift;
|
|
812
|
|
813 $seq = lc $seq;
|
|
814 $seq =~ tr/acgtrymkswhbvdnx/tgcayrkmswdvbhnx/;
|
|
815 return CORE::reverse $seq;
|
|
816 }
|
|
817
|
|
818
|
|
819 sub _enzymes {
|
|
820 #REBASE version 005 type2.005
|
|
821 my %enzymes = (
|
|
822 'AarI' => 'cacctgc',
|
|
823 'AatII' => 'gacgtc',
|
|
824 'AccI' => 'gt[ac][gt]ac',
|
|
825 'AceIII' => 'cagctc',
|
|
826 'AciI' => 'ccgc',
|
|
827 'AclI' => 'aacgtt',
|
|
828 'AcyI' => 'g[ag]cg[ct]c',
|
|
829 'AflII' => 'cttaag',
|
|
830 'AflIII' => 'ac[ag][ct]gt',
|
|
831 'AgeI' => 'accggt',
|
|
832 'AhaIII' => 'tttaaa',
|
|
833 'AloI' => 'gaac[acgt][acgt][acgt][acgt][acgt][acgt]tcc',
|
|
834 'AluI' => 'agct',
|
|
835 'AlwNI' => 'cag[acgt][acgt][acgt]ctg',
|
|
836 'ApaBI' => 'gca[acgt][acgt][acgt][acgt][acgt]tgc',
|
|
837 'ApaI' => 'gggccc',
|
|
838 'ApaLI' => 'gtgcac',
|
|
839 'ApoI' => '[ag]aatt[ct]',
|
|
840 'AscI' => 'ggcgcgcc',
|
|
841 'AsuI' => 'gg[acgt]cc',
|
|
842 'AsuII' => 'ttcgaa',
|
|
843 'AvaI' => 'c[ct]cg[ag]g',
|
|
844 'AvaII' => 'gg[at]cc',
|
|
845 'AvaIII' => 'atgcat',
|
|
846 'AvrII' => 'cctagg',
|
|
847 'BaeI' => 'ac[acgt][acgt][acgt][acgt]gta[ct]c',
|
|
848 'BalI' => 'tggcca',
|
|
849 'BamHI' => 'ggatcc',
|
|
850 'BbvCI' => 'cctcagc',
|
|
851 'BbvI' => 'gcagc',
|
|
852 'BbvII' => 'gaagac',
|
|
853 'BccI' => 'ccatc',
|
|
854 'Bce83I' => 'cttgag',
|
|
855 'BcefI' => 'acggc',
|
|
856 'BcgI' => 'cga[acgt][acgt][acgt][acgt][acgt][acgt]tgc',
|
|
857 'BciVI' => 'gtatcc',
|
|
858 'BclI' => 'tgatca',
|
|
859 'BetI' => '[at]ccgg[at]',
|
|
860 'BfiI' => 'actggg',
|
|
861 'BglI' => 'gcc[acgt][acgt][acgt][acgt][acgt]ggc',
|
|
862 'BglII' => 'agatct',
|
|
863 'BinI' => 'ggatc',
|
|
864 'BmgI' => 'g[gt]gccc',
|
|
865 'BplI' => 'gag[acgt][acgt][acgt][acgt][acgt]ctc',
|
|
866 'Bpu10I' => 'cct[acgt]agc',
|
|
867 'BsaAI' => '[ct]acgt[ag]',
|
|
868 'BsaBI' => 'gat[acgt][acgt][acgt][acgt]atc',
|
|
869 'BsaXI' => 'ac[acgt][acgt][acgt][acgt][acgt]ctcc',
|
|
870 'BsbI' => 'caacac',
|
|
871 'BscGI' => 'cccgt',
|
|
872 'BseMII' => 'ctcag',
|
|
873 'BsePI' => 'gcgcgc',
|
|
874 'BseRI' => 'gaggag',
|
|
875 'BseSI' => 'g[gt]gc[ac]c',
|
|
876 'BsgI' => 'gtgcag',
|
|
877 'BsiI' => 'cacgag',
|
|
878 'BsiYI' => 'cc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gg',
|
|
879 'BsmAI' => 'gtctc',
|
|
880 'BsmI' => 'gaatgc',
|
|
881 'Bsp1407I' => 'tgtaca',
|
|
882 'Bsp24I' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]tgg',
|
|
883 'BspGI' => 'ctggac',
|
|
884 'BspHI' => 'tcatga',
|
|
885 'BspLU11I' => 'acatgt',
|
|
886 'BspMI' => 'acctgc',
|
|
887 'BspMII' => 'tccgga',
|
|
888 'BsrBI' => 'ccgctc',
|
|
889 'BsrDI' => 'gcaatg',
|
|
890 'BsrI' => 'actgg',
|
|
891 'BstEII' => 'ggt[acgt]acc',
|
|
892 'BstXI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]tgg',
|
|
893 'BtrI' => 'cacgtc',
|
|
894 'BtsI' => 'gcagtg',
|
|
895 'Cac8I' => 'gc[acgt][acgt]gc',
|
|
896 'CauII' => 'cc[cg]gg',
|
|
897 'Cfr10I' => '[ag]ccgg[ct]',
|
|
898 'CfrI' => '[ct]ggcc[ag]',
|
|
899 'CjeI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt]gt',
|
|
900 'CjePI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt]tc',
|
|
901 'ClaI' => 'atcgat',
|
|
902 'CviJI' => '[ag]gc[ct]',
|
|
903 'CviRI' => 'tgca',
|
|
904 'DdeI' => 'ct[acgt]ag',
|
|
905 'DpnI' => 'gatc',
|
|
906 'DraII' => '[ag]gg[acgt]cc[ct]',
|
|
907 'DraIII' => 'cac[acgt][acgt][acgt]gtg',
|
|
908 'DrdI' => 'gac[acgt][acgt][acgt][acgt][acgt][acgt]gtc',
|
|
909 'DrdII' => 'gaacca',
|
|
910 'DsaI' => 'cc[ag][ct]gg',
|
|
911 'Eam1105I' => 'gac[acgt][acgt][acgt][acgt][acgt]gtc',
|
|
912 'EciI' => 'ggcgga',
|
|
913 'Eco31I' => 'ggtctc',
|
|
914 'Eco47III' => 'agcgct',
|
|
915 'Eco57I' => 'ctgaag',
|
|
916 'EcoNI' => 'cct[acgt][acgt][acgt][acgt][acgt]agg',
|
|
917 'EcoRI' => 'gaattc',
|
|
918 'EcoRII' => 'cc[at]gg',
|
|
919 'EcoRV' => 'gatatc',
|
|
920 'Esp3I' => 'cgtctc',
|
|
921 'EspI' => 'gct[acgt]agc',
|
|
922 'FauI' => 'cccgc',
|
|
923 'FinI' => 'gggac',
|
|
924 'Fnu4HI' => 'gc[acgt]gc',
|
|
925 'FnuDII' => 'cgcg',
|
|
926 'FokI' => 'ggatg',
|
|
927 'FseI' => 'ggccggcc',
|
|
928 'GdiII' => 'cggcc[ag]',
|
|
929 'GsuI' => 'ctggag',
|
|
930 'HaeI' => '[at]ggcc[at]',
|
|
931 'HaeII' => '[ag]gcgc[ct]',
|
|
932 'HaeIII' => 'ggcc',
|
|
933 'HaeIV' => 'ga[ct][acgt][acgt][acgt][acgt][acgt][ag]tc',
|
|
934 'HgaI' => 'gacgc',
|
|
935 'HgiAI' => 'g[at]gc[at]c',
|
|
936 'HgiCI' => 'gg[ct][ag]cc',
|
|
937 'HgiEII' => 'acc[acgt][acgt][acgt][acgt][acgt][acgt]ggt',
|
|
938 'HgiJII' => 'g[ag]gc[ct]c',
|
|
939 'HhaI' => 'gcgc',
|
|
940 'Hin4I' => 'ga[cgt][acgt][acgt][acgt][acgt][acgt][acg]tc',
|
|
941 'HindII' => 'gt[ct][ag]ac',
|
|
942 'HindIII' => 'aagctt',
|
|
943 'HinfI' => 'ga[acgt]tc',
|
|
944 'HpaI' => 'gttaac',
|
|
945 'HpaII' => 'ccgg',
|
|
946 'HphI' => 'ggtga',
|
|
947 'Hpy178III' => 'tc[acgt][acgt]ga',
|
|
948 'Hpy188I' => 'tc[acgt]ga',
|
|
949 'Hpy99I' => 'cg[at]cg',
|
|
950 'KpnI' => 'ggtacc',
|
|
951 'Ksp632I' => 'ctcttc',
|
|
952 'MaeI' => 'ctag',
|
|
953 'MaeII' => 'acgt',
|
|
954 'MaeIII' => 'gt[acgt]ac',
|
|
955 'MboI' => 'gatc',
|
|
956 'MboII' => 'gaaga',
|
|
957 'McrI' => 'cg[ag][ct]cg',
|
|
958 'MfeI' => 'caattg',
|
|
959 'MjaIV' => 'gt[acgt][acgt]ac',
|
|
960 'MluI' => 'acgcgt',
|
|
961 'MmeI' => 'tcc[ag]ac',
|
|
962 'MnlI' => 'cctc',
|
|
963 'MseI' => 'ttaa',
|
|
964 'MslI' => 'ca[ct][acgt][acgt][acgt][acgt][ag]tg',
|
|
965 'MstI' => 'tgcgca',
|
|
966 'MwoI' => 'gc[acgt][acgt][acgt][acgt][acgt][acgt][acgt]gc',
|
|
967 'NaeI' => 'gccggc',
|
|
968 'NarI' => 'ggcgcc',
|
|
969 'NcoI' => 'ccatgg',
|
|
970 'NdeI' => 'catatg',
|
|
971 'NheI' => 'gctagc',
|
|
972 'NlaIII' => 'catg',
|
|
973 'NlaIV' => 'gg[acgt][acgt]cc',
|
|
974 'NotI' => 'gcggccgc',
|
|
975 'NruI' => 'tcgcga',
|
|
976 'NspBII' => 'c[ac]gc[gt]g',
|
|
977 'NspI' => '[ag]catg[ct]',
|
|
978 'PacI' => 'ttaattaa',
|
|
979 'Pfl1108I' => 'tcgtag',
|
|
980 'PflMI' => 'cca[acgt][acgt][acgt][acgt][acgt]tgg',
|
|
981 'PleI' => 'gagtc',
|
|
982 'PmaCI' => 'cacgtg',
|
|
983 'PmeI' => 'gtttaaac',
|
|
984 'PpiI' => 'gaac[acgt][acgt][acgt][acgt][acgt]ctc',
|
|
985 'PpuMI' => '[ag]gg[at]cc[ct]',
|
|
986 'PshAI' => 'gac[acgt][acgt][acgt][acgt]gtc',
|
|
987 'PsiI' => 'ttataa',
|
|
988 'PstI' => 'ctgcag',
|
|
989 'PvuI' => 'cgatcg',
|
|
990 'PvuII' => 'cagctg',
|
|
991 'RleAI' => 'cccaca',
|
|
992 'RsaI' => 'gtac',
|
|
993 'RsrII' => 'cgg[at]ccg',
|
|
994 'SacI' => 'gagctc',
|
|
995 'SacII' => 'ccgcgg',
|
|
996 'SalI' => 'gtcgac',
|
|
997 'SanDI' => 'ggg[at]ccc',
|
|
998 'SapI' => 'gctcttc',
|
|
999 'SauI' => 'cct[acgt]agg',
|
|
1000 'ScaI' => 'agtact',
|
|
1001 'ScrFI' => 'cc[acgt]gg',
|
|
1002 'SduI' => 'g[agt]gc[act]c',
|
|
1003 'SecI' => 'cc[acgt][acgt]gg',
|
|
1004 'SexAI' => 'acc[at]ggt',
|
|
1005 'SfaNI' => 'gcatc',
|
|
1006 'SfeI' => 'ct[ag][ct]ag',
|
|
1007 'SfiI' => 'ggcc[acgt][acgt][acgt][acgt][acgt]ggcc',
|
|
1008 'SgfI' => 'gcgatcgc',
|
|
1009 'SgrAI' => 'c[ag]ccgg[ct]g',
|
|
1010 'SimI' => 'gggtc',
|
|
1011 'SmaI' => 'cccggg',
|
|
1012 'SmlI' => 'ct[ct][ag]ag',
|
|
1013 'SnaBI' => 'tacgta',
|
|
1014 'SnaI' => 'gtatac',
|
|
1015 'SpeI' => 'actagt',
|
|
1016 'SphI' => 'gcatgc',
|
|
1017 'SplI' => 'cgtacg',
|
|
1018 'SrfI' => 'gcccgggc',
|
|
1019 'Sse232I' => 'cgccggcg',
|
|
1020 'Sse8387I' => 'cctgcagg',
|
|
1021 'Sse8647I' => 'agg[at]cct',
|
|
1022 'SspI' => 'aatatt',
|
|
1023 'Sth132I' => 'cccg',
|
|
1024 'StuI' => 'aggcct',
|
|
1025 'StyI' => 'cc[at][at]gg',
|
|
1026 'SwaI' => 'atttaaat',
|
|
1027 'TaqI' => 'tcga',
|
|
1028 'TaqII' => 'gaccga',
|
|
1029 'TatI' => '[at]gtac[at]',
|
|
1030 'TauI' => 'gc[cg]gc',
|
|
1031 'TfiI' => 'ga[at]tc',
|
|
1032 'TseI' => 'gc[at]gc',
|
|
1033 'Tsp45I' => 'gt[cg]ac',
|
|
1034 'Tsp4CI' => 'ac[acgt]gt',
|
|
1035 'TspEI' => 'aatt',
|
|
1036 'TspRI' => 'ca[cg]tg[acgt][acgt]',
|
|
1037 'Tth111I' => 'gac[acgt][acgt][acgt]gtc',
|
|
1038 'Tth111II' => 'caa[ag]ca',
|
|
1039 'UbaGI' => 'cac[acgt][acgt][acgt][acgt]gtg',
|
|
1040 'UbaPI' => 'cgaacg',
|
|
1041 'VspI' => 'attaat',
|
|
1042 'XbaI' => 'tctaga',
|
|
1043 'XcmI' => 'cca[acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt][acgt]tgg',
|
|
1044 'XhoI' => 'ctcgag',
|
|
1045 'XhoII' => '[ag]gatc[ct]',
|
|
1046 'XmaIII' => 'cggccg',
|
|
1047 'XmnI' => 'gaa[acgt][acgt][acgt][acgt]ttc'
|
|
1048 );
|
|
1049
|
|
1050 return %enzymes;
|
|
1051 }
|
|
1052
|
|
1053 1;
|