comparison variant_effect_predictor/Bio/LiveSeq/SeqI.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: SeqI.pm,v 1.25 2002/10/22 07:38:34 lapp Exp $
2 #
3 # bioperl module for Bio::LiveSeq::SeqI
4 #
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
6 #
7 # Copyright Joseph Insana
8 #
9 # You may distribute this module under the same terms as perl itself
10 #
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::LiveSeq::SeqI - Abstract sequence interface class for LiveSeq
16
17 =head1 SYNOPSIS
18
19 # documentation needed
20
21 =head1 DESCRIPTION
22
23 This class implements BioPerl PrimarySeqI interface for Live Seq objects.
24
25 One of the main difference in LiveSequence compared to traditional
26 "string" sequences is that coordinate systems are flexible. Typically
27 gene nucleotide numbering starts from 1 at the first character of the
28 initiator codon (A in ATG). This means that negative positions are
29 possible and common!
30
31 Secondly, the sequence manipulation methods do not return a new
32 sequence object but change the current object. The current status can
33 be written out to BioPerl sequence objects.
34
35 =head1 FEEDBACK
36
37 =head2 Mailing Lists
38
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to one
41 of the Bioperl mailing lists. Your participation is much appreciated.
42
43 bioperl-l@bioperl.org - General discussion
44 http://bio.perl.org/MailList.html - About the mailing lists
45
46 =head2 Reporting Bugs
47
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 the bugs and their resolution. Bug reports can be submitted via email
50 or the web:
51
52 bioperl-bugs@bio.perl.org
53 http://bugzilla.bioperl.org/
54
55 =head1 AUTHOR - Joseph A.L. Insana
56
57 Email: Insana@ebi.ac.uk, jinsana@gmx.net
58
59 Address:
60
61 EMBL Outstation, European Bioinformatics Institute
62 Wellcome Trust Genome Campus, Hinxton
63 Cambs. CB10 1SD, United Kingdom
64
65 =head1 APPENDIX
66
67 The rest of the documentation details each of the object
68 methods. Internal methods are usually preceded with a _
69
70 Some note on the terminology/notation of method names:
71 label: a unique pointer to a single nucleotide
72 position: the position of a nucleotide according to a particular coordinate
73 system (e.g. counting downstream from a particular label taken as
74 number 1)
75 base: the one letter code for a nucleotide (i.e.: "a" "t" "c" "g")
76
77 a base is the "value" that an "element" of a "chain" can assume
78 (see documentation on the Chain datastructure if interested)
79
80 =cut
81
82 #'
83 # Let the code begin...
84
85 package Bio::LiveSeq::SeqI;
86 $VERSION=3.3;
87 # Version history:
88 # Thu Mar 16 18:11:18 GMT 2000 v.1.0 Started implementation, interface/inheritance from ChainI.pm
89 # Thu Mar 16 20:05:51 GMT 2000 v 1.2 implemented up to splice_out
90 # Fri Mar 17 05:37:37 GMT 2000 v 1.3 implemented lot of new methods and written their documentation / in sync with ChainI 1.6 and Chain 2.4
91 # Fri Mar 17 17:17:24 GMT 2000 v 1.7 in sync with ChainI 1.7
92 # Fri Mar 17 20:12:27 GMT 2000 v 1.8 NAMING change: index->label everywhere
93 # Mon Mar 20 19:19:21 GMT 2000 v 2.0 renamed from DNA to SeqI and begun
94 # working on methods defined with Heikki
95 # Tue Mar 21 01:37:52 GMT 2000 v 2.1 created strand(), seq()
96 # Tue Mar 21 02:43:21 GMT 2000 v 2.11 seq() prints correctly also for exons
97 # Wed Mar 22 19:41:45 GMT 2000 v 2.22 translate, alphabet, length, all_labels
98 # Thu Mar 23 21:03:42 GMT 2000 v 2.3 follows() label() position()
99 # Fri Mar 24 18:33:18 GMT 2000 v 2.33 rewritten position(), now works with diverse coordinate_starts
100 # Sat Mar 25 06:11:55 GMT 2000 v 2.4 started subseq
101 # Mon Mar 27 19:22:32 BST 2000 v 2.45 subseq should be ok but the thing about reverse strand has to be checked!!
102 # Tue Mar 28 01:53:31 BST 2000 v 2.46 changed strand behaviour in subseq
103 # Wed Mar 29 00:05:21 BST 2000 v 2.5 change() begun
104 # Wed Mar 29 02:06:20 BST 2000 v 2.53 _delete _mutate _praeinsert coded
105 # Wed Mar 29 02:29:01 BST 2000 v 2.531 _mutate changed to make it more general
106 # Wed Mar 29 03:38:21 BST 2000 v 2.54 tested and corrected change
107 # Wed Mar 29 16:23:39 BST 2000 v 2.55 change deals with complex now
108 # Fri Mar 31 18:26:54 BST 2000 v 2.56 translate_string added
109 # Sat Apr 1 19:02:28 BST 2000 v 2.57 labelchange() created
110 # Fri Apr 7 03:31:35 BST 2000 v 2.6 labelsubseq() created
111 # Sat Apr 8 13:01:09 BST 2000 v 2.61 obj_valid() created
112 # Wed Apr 12 16:23:21 BST 2000 v 2.7 _deletecheck call added in _delete
113 # Wed Apr 19 16:21:33 BST 2000 v 2.72 name() source() description() added
114 # Thu Apr 20 14:42:57 BST 2000 v 2.8 added or rewritten much pod documentation
115 # Thu Apr 27 16:18:55 BST 2000 v 2.82 translate now accounts for ttable info
116 # Thu Jun 22 20:02:39 BST 2000 v 2.9 valid() from Transcript now moved here, as the general for all objects inheriting from SeqI
117 # Thu Jun 22 20:17:32 BST 2000 v 2.91 _unsecure_labelsubseq() added
118 # Sat Jun 24 00:10:31 BST 2000 v 2.92 unsecure is an option of labelsubseq() now
119 # Thu Jun 29 16:38:45 BST 2000 v 3.0 labelchange() now calls itself again for the DNAobj if the label for the change is not valid for the object requested but valid for the DNAobj
120 # Tue Jan 30 14:16:22 EST 2001 v 3.1 delete_Obj added, to flush circular references
121 # Wed Mar 28 15:16:38 BST 2001 v 3.2 functions warn, verbose, throw, stack_trace, stack_trace_dump added
122 # Wed Apr 4 13:34:29 BST 2001 v 3.3 moved from carp to warn
123
124 use strict;
125 use vars qw($VERSION @ISA);
126 use Bio::LiveSeq::ChainI 1.9; # to inherit from it
127 use Bio::Tools::CodonTable; # for the translate() function
128 use Bio::PrimarySeqI;
129
130 @ISA=qw(Bio::Root::Root Bio::LiveSeq::ChainI Bio::PrimarySeqI ); # inherit from ChainI
131
132 =head2 seq
133
134 Title : seq
135 Usage : $string = $obj->seq()
136 Function: Returns the complete sequence of an object as a string of letters.
137 Suggested cases are upper case for proteins and lower case for
138 DNA sequence (IUPAC standard),
139 Returns : a string
140
141
142 =cut
143
144 sub seq {
145 my $self = shift;
146 my ($start,$end) = ($self->start(),$self->end());
147 if ($self->strand() == 1) {
148 return $self->{'seq'}->down_chain2string($start,undef,$end);
149 } else { # reverse strand
150 my $str = $self->{'seq'}->up_chain2string($start,undef,$end);
151 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
152 return $str;
153 }
154 }
155
156 =head2 all_labels
157
158 Title : all_labels
159 Usage : @labels = $obj->all_labels()
160 Function: all the labels of every nucleotide an object is composed of
161 Returns : an array of labels
162 Args : none
163
164 =cut
165
166 sub all_labels {
167 my $self = shift;
168 my ($start,$end) = ($self->start(),$self->end());
169 my $labels;
170 if ($self->strand() == 1) {
171 $labels=$self->{'seq'}->down_labels($start,$end);
172 } else {
173 $labels=$self->{'seq'}->up_labels($start,$end);
174 }
175 return (@{$labels});
176 }
177
178 =head2 labelsubseq
179
180 Title : labelsubseq
181 Usage : $dna->labelsubseq();
182 : $dna->labelsubseq($startlabel);
183 : $dna->labelsubseq($startlabel,$length);
184 : $dna->labelsubseq($startlabel,undef,$endlabel);
185 e.g. : $dna->labelsubseq(4,undef,8);
186 Function: prints the sequence as string. The difference between labelsubseq
187 and normal subseq is that it uses /labels/ as arguments, instead
188 than positions. This allows for faster and more efficient lookup,
189 skipping the (usually) lengthy conversion of positions into labels.
190 This is expecially useful for manipulating with high power
191 LiveSeq objects, knowing the labels and exploiting their
192 usefulness.
193 Returns : a string
194 Errorcode -1
195 Args : without arguments it returns the entire sequence
196 with a startlabel it returns the sequence downstream that label
197 if a length is specified, it returns only that number of bases
198 if an endlabel is specified, it overrides the length argument
199 and prints instead up to that label (included)
200 Defaults: $startlabel defaults to the beginning of the entire sequence
201 $endlabel defaults to the end of the entire sequence
202
203 =cut
204
205 # NOTE: unsecuremode is to be used /ONLY/ if sure of the start and end labels, expecially that they follow each other in the correct order!!!!
206
207 sub labelsubseq {
208 my ($self,$start,$length,$end,$unsecuremode) = @_;
209 if (defined $unsecuremode && $unsecuremode eq "unsecuremoderequested")
210 { # to skip security checks (faster)
211 unless ($start) {
212 $start=$self->start;
213 }
214 if ($end) {
215 if ($end == $start) {
216 $length=1;
217 undef $end;
218 } else {
219 undef $length;
220 }
221 } else {
222 unless ($length) {
223 $end=$self->end;
224 }
225 }
226 } else {
227 if ($start) {
228 unless ($self->{'seq'}->valid($start)) {
229 $self->warn("Start label not valid"); return (-1);
230 }
231 }
232 if ($end) {
233 if ($end == $start) {
234 $length=1;
235 undef $end;
236 } else {
237 unless ($self->{'seq'}->valid($end)) {
238 $self->warn("End label not valid"); return (-1);
239 }
240 unless ($self->follows($start,$end) == 1) {
241 $self->warn("End label does not follow Start label!"); return (-1);
242 }
243 undef $length;
244 }
245 }
246 }
247 if ($self->strand() == 1) {
248 return $self->{'seq'}->down_chain2string($start,$length,$end);
249 } else { # reverse strand
250 my $str = $self->{'seq'}->up_chain2string($start,$length,$end);
251 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
252 return $str;
253 }
254 }
255
256 =head2 subseq
257
258 Title : subseq
259 Usage : $substring = $obj->subseq(10,40);
260 : $substring = $obj->subseq(10,undef,4);
261 Function: returns the subseq from start to end, where the first base
262 is 1 and the number is inclusive, ie 1-2 are the first two
263 bases of the sequence
264
265 Start cannot be larger than end but can be equal.
266
267 Allows for negative numbers $obj->subseq(-10,-1). By
268 definition, there is no 0!
269 -5 -1 1 5
270 gctagcgcccaac atggctcgctg
271
272 This allows to retrieve sequences upstream from given position.
273
274 The precedence is from left to right: if END is given LENGTH is
275 ignored.
276
277 Examples: $obj->subseq(-10,undef,10) returns 10 elements before position 1
278 $obj->subseq(4,8) returns elements from the 4th to the 8th, inclusive
279
280 Returns : a string
281 Errorcode: -1
282 Args : start, integer, defaults to start of the sequence
283 end, integer, '' or undef, defaults to end of the sequence
284 length, integer, '' or undef
285 an optional strand (1 or -1) 4th argument
286 if strand argument is not given, it will default to the object
287 argment. This argument is useful when a call is issued from a child
288 of a parent object containing the subseq method
289
290 =cut
291
292 #'
293 # check the fact about reverse strand!
294 # is it feasible? Is it correct? Should we do it? How about exons? Does it
295 # work when you ask subseq of an exon?
296 # eliminated now (Mon night)
297 sub subseq {
298 ##my ($self,$pos1,$pos2,$length,$strand) = @_;
299 my ($self,$pos1,$pos2,$length,$strand) = @_;
300 ##unless (defined ($strand)) { # if optional [strand] argument not given
301 ## $strand=$self->strand;
302 ##}
303 $strand=$self->strand;
304 my ($str,$startlabel,$endlabel);
305 if (defined ($length)) {
306 if ($length < 1) {
307 $self->warn("No sense asking for a subseq of length < 1");
308 return (-1);
309 }
310 }
311 unless (defined ($pos1)) {
312 #print "\n##### DEBUG pos1 not defined\n";
313 $startlabel=$self->start;
314 } else {
315 if ($pos1 == 0) { # if position = 0 complain
316 $self->warn("Position cannot be 0!"); return (-1);
317 }
318 ##if ($strand == 1) { # CHECK THIS!
319 if ((defined ($pos2))&&($pos1>$pos2)) {
320 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
321 }
322 ##} else { # CHECK THIS!
323 ## if ((defined ($pos2))&&($pos1<$pos2)) {
324 ## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!)"; return (-1);
325 ## }
326 ##}
327 $startlabel=$self->label($pos1);
328 if ($startlabel < 1) {
329 $self->warn("position $pos1 not valid as start of subseq!"); return (-1);
330 }
331 }
332 unless (defined ($pos2)) {
333 #print "\n##### pos2 not defined\n";
334 unless (defined ($length)) {
335 $endlabel=$self->end;
336 }
337 } else {
338 if ($pos2 == 0) { # if position = 0 complain
339 $self->warn("Position cannot be 0!"); return (-1);
340 }
341 undef $length;
342 ##if ($strand == 1) { # CHECK THIS!
343 if ((defined ($pos1))&&($pos1>$pos2)) {
344 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
345 }
346 ##} else { # CHECK THIS!
347 ## if ((defined ($pos1))&&($pos1<$pos2)) {
348 ## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!"); return (-1);
349 ## }
350 ##}
351 $endlabel=$self->label($pos2);
352 if ($endlabel < 1) {
353 $self->warn("position $pos2 not valid as end of subseq!"); return (-1);
354 }
355 }
356 #print "\n ####DEBUG: start $startlabel end $endlabel length $length strand $strand\n";
357
358 if ($strand == 1) {
359 $str = $self->{'seq'}->down_chain2string($startlabel,$length,$endlabel);
360 } else { # reverse strand
361 $str = $self->{'seq'}->up_chain2string($startlabel,$length,$endlabel);
362 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
363 }
364 return $str;
365 }
366
367 =head2 length
368
369 Title : length
370 Usage : $seq->length();
371 Function: returns the number of nucleotides (or the number of aminoacids)
372 in the entire sequence
373 Returns : an integer
374 Errorcode -1
375 Args : none
376
377 =cut
378
379 sub length {
380 my $self=shift;
381 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
382 if ($strand == 1) {
383 return $self->{'seq'}->down_subchain_length($start,$end);
384 } else {
385 return $self->{'seq'}->up_subchain_length($start,$end);
386 }
387 }
388
389 =head2 display_id
390
391 Title : display_id
392 Usage : $id_string = $obj->display_id();
393 Function: returns the display id, alias the common name of the object
394
395 The semantics of this is that it is the most likely string
396 to be used as an identifier of the sequence, and likely to
397 have "human" readability. The id is equivalent to the ID
398 field of the GenBank/EMBL databanks and the id field of the
399 Swissprot/sptrembl database. In fasta format, the >(\S+) is
400 presumed to be the id, though some people overload the id
401 to embed other information.
402
403 See also: accession_number
404 Returns : a string
405 Args : none
406
407 =cut
408
409 sub display_id {
410 my ($self,$value) = @_;
411 if(defined $value) {
412 $self->{'display_id'} = $value;
413 }
414 return $self->{'display_id'};
415 }
416
417
418 =head2 accession_number
419
420 Title : accession_number
421 Usage : $unique_biological_key = $obj->accession_number;
422 Function: Returns the unique biological id for a sequence, commonly
423 called the accession_number.
424 Notice that primary_id() provides the unique id for the
425 implemetation, allowing multiple objects to have the same accession
426 number in a particular implementation.
427
428 For objects with no accession_number this method returns "unknown".
429 Returns : a string
430 Args : none
431
432 =cut
433
434 sub accession_number {
435 my ($self,$value) = @_;
436 if (defined $value) {
437 $self->{'accession_number'} = $value;
438 }
439 unless (exists $self->{'accession_number'}) {
440 return "unknown";
441 } else {
442 return $self->{'accession_number'};
443 }
444 }
445
446 =head2 primary_id
447
448 Title : primary_id
449 Usage : $unique_implementation_key = $obj->primary_id;
450 Function: Returns the unique id for this object in this
451 implementation. This allows implementations to manage their own
452 object ids in a way the implementation can control. Clients can
453 expect one id to map to one object.
454
455 For sequences with no primary_id, this method returns
456 a stringified memory location.
457
458 Returns : A string
459 Args : None
460
461 =cut
462
463
464 sub primary_id {
465 my ($self,$value) = @_;
466 if(defined $value) {
467 $self->{'primary_id'} = $value;
468 }
469 unless (exists $self->{'primary_id'}) {
470 return "$self";
471 } else {
472 return $self->{'primary_id'};
473 }
474 }
475
476 =head2 change
477
478 Title : change
479 Usage : $substring = $obj->change('AA', 10);
480 Function: changes, modifies, mutates the LiveSequence
481 Examples:
482 $obj->change('', 10); delete nucleotide #10
483 $obj->change('', 10, 2); delete two nucleotides starting from #10
484 $obj->change('G', 10); change nuc #10 to 'G'
485 $obj->change('GA', 10, 4); replace #10 and 3 following with 'GA'
486 $obj->change('GA', 10, 2)); is same as $obj->change('GA', 10);
487 $obj->change('GA', 10, 0 ); insert 'GA' before nucleotide at #10
488 $obj->change('GA', 10, 1); GA inserted before #10, #10 deleted
489 $obj->change('GATC', 10, 2); GATC inserted before #10, #10&#11 deleted
490 $obj->change('GATC', 10, 6); GATC inserted before #10, #10-#15 deleted
491
492
493 Returns : a string of deleted bases (if any) or 1 (everything OK)
494 Errorcode: -1
495 Args : seq, string, or '' ('' = undef = 0 = deletion)
496 start, integer
497 length, integer (optional)
498
499 =cut
500
501 sub change {
502 &positionchange;
503 }
504
505 =head2 positionchange
506
507 Title : positionchange
508 Function: Exactly like change. I.e. change() defaults to positionchange()
509
510 =cut
511
512 sub positionchange {
513 my ($self,$newseq,$position,$length)=@_;
514 unless ($position) {
515 $self->warn("Position not given or position 0");
516 return (-1);
517 }
518 my $label=$self->label($position);
519 unless ($label > 0) { # label not found or error
520 $self->warn("No valid label found at that position!");
521 return (-1);
522 }
523 return ($self->labelchange($newseq,$label,$length));
524 }
525
526 =head2 labelchange
527
528 Title : labelchange
529 Function: Exactly like change but uses a /label/ instead than a position
530 as second argument. This allows for multiple changes in a LiveSeq
531 without the burden of recomputing positions. I.e. for a multiple
532 change in two different points of the LiveSeq, the approach would
533 be the following: fetch the correct labels out of the two different
534 positions (method: label($position)) and then use the labelchange()
535 method to modify the sequence using those labels instead than
536 relying on the positions (that would have modified after the
537 first change).
538
539 =cut
540
541 sub labelchange {
542 my ($self,$newseq,$label,$length)=@_;
543 unless ($self->valid($label)) {
544 if ($self->{'seq'}->valid($label)) {
545 #$self->warn("Label \'$label\' not valid for executing a LiveSeq change for the object asked but it's ok for DNAlevel change, reverting to that");
546 shift @_;
547 return($self->{'seq'}->labelchange(@_));
548 } else {
549 $self->warn("Label \'$label\' not valid for executing a LiveSeq change");
550 return (-1);
551 }
552 }
553 unless ($newseq) { # it means this is a simple deletion
554 if (defined($length)) {
555 unless ($length >= 0) {
556 $self->warn("No sense having length < 0 in a deletion");
557 return (-1);
558 }
559 } else {
560 $self->warn("Length not defined for deletion!");
561 return (-1);
562 }
563 return $self->_delete($label,$length);
564 }
565 my $newseqlength=CORE::length($newseq);
566 if (defined($length)) {
567 unless ($length >= 0) {
568 $self->warn("No sense having length < 0 in a change()");
569 return (-1);
570 }
571 } else {
572 $length=$newseqlength; # defaults to pointmutation(s)
573 }
574 if ($length == 0) { # it means this is a simple insertion, length def&==0
575 my ($insertbegin,$insertend)=$self->_praeinsert($label,$newseq);
576 if ($insertbegin == -1) {
577 return (-1);
578 } else {
579 return (1);
580 }
581 }
582 if ($newseqlength == $length) { # it means this is simple pointmutation(s)
583 return $self->_mutate($label,$newseq,$length);
584 }
585 # if we arrived here then change is complex mixture
586 my $strand=$self->strand();
587 my $afterendlabel=$self->label($length+1,$label,$strand); # get the label at $length+1 positions after $label
588 unless ($afterendlabel > 0) { # label not found or error
589 $self->warn("No valid afterendlabel found for executing the complex mutation!");
590 return (-1);
591 }
592 my $deleted=$self->_delete($label,$length); # first delete length nucs
593 if ($deleted == -1) { # if errors
594 return (-1);
595 } else { # then insert the newsequence
596 my ($insertbegin,$insertend)=$self->_praeinsert($afterendlabel,$newseq);
597 if ($insertbegin == -1) {
598 return (-1);
599 } else {
600 return (1);
601 }
602 }
603 }
604
605 # internal methods for change()
606
607 # arguments: label for beginning of deletion, new sequence to insert
608 # returns: labels of beginning and end of the inserted sequence
609 # errorcode: -1
610 sub _praeinsert {
611 my ($self,$label,$newseq)=@_;
612 my ($insertbegin,$insertend);
613 my $strand=$self->strand();
614 if ($strand == 1) {
615 ($insertbegin,$insertend)=($self->{'seq'}->praeinsert_string($newseq,$label));
616 } else { # since it's reverse strand and we insert in forward direction....
617 $newseq=reverse($newseq);
618 $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
619 ($insertend,$insertbegin)=($self->{'seq'}->postinsert_string($newseq,$label));
620 }
621 if (($insertbegin==0)||($insertend==0)) {
622 $self->warn("Some error occurred while inserting!");
623 return (-1);
624 } else {
625 return ($insertbegin,$insertend);
626 }
627 }
628
629 # arguments: label for beginning of deletion, length of deletion
630 # returns: string of deleted bases
631 # errorcode: -1
632 sub _delete {
633 my ($self,$label,$length)=@_;
634 my $strand=$self->strand();
635 my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label
636 unless ($endlabel > 0) { # label not found or error
637 $self->warn("No valid endlabel found for executing the deletion!");
638 return (-1);
639 }
640 # this is important in Transcript to fix exon structure
641 $self->_deletecheck($label,$endlabel);
642 my $deletedseq;
643 if ($strand == 1) {
644 $deletedseq=$self->{'seq'}->splice_chain($label,undef,$endlabel);
645 } else {
646 $deletedseq=$self->{'seq'}->splice_chain($endlabel,undef,$label);
647 $deletedseq=reverse($deletedseq); # because we are on reverse strand and we cut anyway
648 # in forward direction
649 $deletedseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
650 }
651 return ($deletedseq);
652 }
653
654 # empty function, overridden in Transcript, not useful here
655 sub _deletecheck {
656 }
657
658 # arguments: label for beginning of mutation, newsequence, number of mutations
659 # returns: 1 all OK
660 # errorcode: -1
661 sub _mutate {
662 my ($self,$label,$newseq,$length)=@_; # length is equal to length(newseq)
663 my ($i,$base,$nextlabel);
664 my @labels; # array of labels
665 my $strand=$self->strand();
666 if ($length == 1) { # special cases first
667 @labels=($label);
668 } else {
669 my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label
670 unless ($endlabel > 0) { # label not found or error
671 $self->warn("No valid endlabel found for executing the mutation!");
672 return (-1);
673 }
674 if ($length == 2) { # another special case
675 @labels=($label,$endlabel);
676 } else { # more than 3 bases changed
677 # this wouldn't work for Transcript
678 #my $labelsarrayref;
679 #if ($strand == 1) {
680 #$labelsarrayref=$self->{'seq'}->down_labels($label,$endlabel);
681 #} else {
682 #$labelsarrayref=$self->{'seq'}->up_labels($label,$endlabel);
683 #}
684 #@labels=@{$labelsarrayref};
685 #if ($length != scalar(@labels)) { # not enough labels returned
686 #$self->warn("Not enough valid labels found for executing the mutation!");
687 #return (-1);
688 #}
689
690 # this should be more general
691 @labels=($label); # put the first one
692 while ($label != $endlabel) {
693 $nextlabel=$self->label(2,$label,$strand); # retrieve the next label
694 push (@labels,$nextlabel);
695 $label=$nextlabel; # move on reference
696 }
697 }
698 }
699 if ($strand == -1) { # only for reverse strand
700 $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases
701 }
702 my $errorcheck; # if not equal to $length after summing for all changes, error did occurr
703 $i = 0;
704 foreach $base (split(//,$newseq)) {
705 $errorcheck += $self->{'seq'}->set_value_at_label($base,$labels[$i]);
706 $i++;
707 }
708 if ($errorcheck != $length) {
709 $self->warn("Some error occurred while mutating!");
710 return (-1);
711 } else {
712 return (1);
713 }
714 }
715
716 =head2 valid
717
718 Title : valid
719 Usage : $boolean = $obj->valid($label)
720 Function: tests if a label exists inside the object
721 Returns : boolean
722 Args : label
723
724 =cut
725
726 # argument: label
727 # returns: 1 YES 0 NO
728 sub valid {
729 my ($self,$label)=@_;
730 my $checkme;
731 my @labels=$self->all_labels;
732 foreach $checkme (@labels) {
733 if ($label == $checkme) {
734 return (1); # found
735 }
736 }
737 return (0); # not found
738 }
739
740
741 =head2 start
742
743 Title : start
744 Usage : $startlabel=$obj->start()
745 Function: returns the label of the first nucleotide of the object (exon, CDS)
746 Returns : label
747 Args : none
748
749 =cut
750
751 sub start {
752 my ($self) = @_;
753 return $self->{'start'}; # common for all classes BUT DNA (which redefines it) and Transcript (that takes the information from the Exons)
754 }
755
756 =head2 end
757
758 Title : end
759 Usage : $endlabel=$obj->end()
760 Function: returns the label of the last nucleotide of the object (exon, CDS)
761 Returns : label
762 Args : none
763
764 =cut
765
766 sub end {
767 my ($self) = @_;
768 return $self->{'end'};
769 }
770
771 =head2 strand
772
773 Title : strand
774 Usage : $strand=$obj->strand()
775 $obj->strand($strand)
776 Function: gets or sets strand information, being 1 or -1 (forward or reverse)
777 Returns : -1 or 1
778 Args : none OR -1 or 1
779
780 =cut
781
782 sub strand {
783 my ($self,$strand) = @_;
784 if ($strand) {
785 if (($strand != 1)&&($strand != -1)) {
786 $self->warn("strand information not changed because strand identifier not valid");
787 } else {
788 $self->{'strand'} = $strand;
789 }
790 }
791 return $self->{'strand'};
792 }
793
794 =head2 alphabet
795
796 Title : alphabet
797 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
798 Function: Returns the type of sequence being one of
799 'dna', 'rna' or 'protein'. This is case sensitive.
800
801 Returns : a string either 'dna','rna','protein'.
802 Args : none
803 Note : "circular dna" is set as dna
804
805 =cut
806
807
808 sub alphabet {
809 my %valid_type = map {$_, 1} qw( dna rna protein );
810 my ($self,$value) = @_;
811 if (defined $value) {
812 $value =~ s/circular dna/dna/;
813 unless ( $valid_type{$value} ) {
814 $self->warn("Molecular type '$value' is not a valid type");
815 }
816 $self->{'alphabet'} = $value;
817 }
818 return $self->{'alphabet'};
819 }
820
821 =head2 coordinate_start
822
823 Title : coordinate_start
824 Usage : $coordstartlabel=$obj->coordinate_start()
825 : $coordstartlabel=$obj->coordinate_start($label)
826 Function: returns and optionally sets the first label of the coordinate
827 system used
828 For some objects only labels inside the object or in frame (for
829 Translation objects) will be allowed to get set as coordinate start
830
831 Returns : label. It returns 0 if label not found.
832 Errorcode -1
833 Args : an optional reference $label that is position 1
834
835 =cut
836
837
838 sub coordinate_start {
839 my ($self,$label) = @_;
840 if ($label) {
841 if ($self->valid($label)) {
842 $self->{'coordinate_start'} = $label;
843 } else {
844 $self->warn("The label you are trying to set as coordinate_start is not valid for this object");
845 }
846 }
847 my $coord_start = $self->{'coordinate_start'};
848 if ($coord_start) {
849 return $coord_start;
850 } else {
851 return $self->start();
852 }
853 }
854
855 =head2 label
856
857 Title : label
858 Usage : $seq->label($position)
859 : $seq->label($position,$firstlabel)
860 Examples: $nextlabel=$seq->label(2,$label) -> retrieves the following label
861 : $prevlabel=$seq->label(-1,$label) -> retrieves the preceding label
862
863 Function: returns the label of the nucleotide at $position from current
864 coordinate start
865 Returns : a label. It returns 0 if label not found.
866 Errorcode -1
867 Args : a position,
868 an optional reference $firstlabel that is to be used as position 1
869 an optional strand (1 or -1) argument
870 if strand argument is not given, it will default to the object
871 argument. This argument is useful when a call is issued from a child
872 of a parent object containing the subseq method
873
874 =cut
875
876
877 sub label {
878 my ($self,$position,$firstlabel,$strand)=@_;
879 my $label;
880 unless (defined ($firstlabel)) {
881 $firstlabel=$self->coordinate_start;
882 }
883 unless ($position) { # if position = 0 complain ?
884 $self->warn("Position not given or position 0");
885 return (-1);
886 }
887 unless (defined ($strand)) { # if optional [strand] argument not given
888 $strand=$self->strand;
889 }
890 if ($strand == 1) {
891 if ($position > 0) {
892 $label=$self->{'seq'}->down_get_label_at_pos($position,$firstlabel)
893 } else { # if < 0
894 $label=$self->{'seq'}->up_get_label_at_pos(1 - $position,$firstlabel)
895 }
896 } else {
897 if ($position > 0) {
898 $label=$self->{'seq'}->up_get_label_at_pos($position,$firstlabel)
899 } else { # if < 0
900 $label=$self->{'seq'}->down_get_label_at_pos(1 - $position,$firstlabel)
901 }
902 }
903 return $label;
904 }
905
906
907 =head2 position
908
909 Title : position
910 Usage : $seq->position($label)
911 : $seq->position($label,$firstlabel)
912 Function: returns the position of nucleotide at $label
913 Returns : the position of the label from current coordinate start
914 Errorcode 0
915 Args : a label pointing to a certain nucleotide (e.g. start of exon)
916 an optional "firstlabel" as reference to count from
917 an optional strand (1 or -1) argument
918 if strand argument is not given, it will default to the object
919 argument. This argument is useful when a call is issued from a child
920 of a parent object containing the subseq method
921
922 =cut
923
924
925 sub position {
926 my ($self,$label,$firstlabel,$strand)=@_;
927 unless (defined ($strand)) { # if optional [strand] argument not given
928 $strand=$self->strand;
929 }
930 unless (defined ($firstlabel)) {
931 $firstlabel=$self->coordinate_start;
932 }
933 unless ($self->valid($label)) {
934 $self->warn("label not valid");
935 return (0);
936 }
937 if ($firstlabel == $label) {
938 return (1);
939 }
940 my ($coordpos,$position0,$position);
941 $position0=$self->{'seq'}->down_get_pos_of_label($label);
942 $coordpos=$self->{'seq'}->down_get_pos_of_label($firstlabel);
943 $position=$position0-$coordpos+1;
944 if ($position <= 0) {
945 $position--;
946 }
947 if ($strand == -1) {
948 #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",1-$position;
949 return (1-$position);
950 } else {
951 #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",$position;
952 return ($position);
953 }
954 }
955
956 =head2 follows
957
958 Title : follows
959 Usage : $seq->follows($firstlabel,$secondlabel)
960 : $seq->follows($firstlabel,$secondlabel,$strand)
961 Function: checks if SECONDlabel follows FIRSTlabel, undependent of the strand
962 i.e. it checks downstream for forward strand and
963 upstream for reverse strand
964 Returns : 1 or 0
965 Errorcode -1
966 Args : two labels
967 an optional strand (1 or -1) argument
968 if strand argument is not given, it will default to the object
969 argument. This argument is useful when a call is issued from a child
970 of a parent object containing the subseq method
971
972 =cut
973
974 #'
975 # wraparound to is_downstream and is_upstream that chooses the correct one
976 # depending on the strand
977 sub follows {
978 my ($self,$firstlabel,$secondlabel,$strand)=@_;
979 unless (defined ($strand)) { # if optional [strand] argument not given
980 $strand=$self->strand;
981 }
982 if ($strand == 1) {
983 return ($self->{'seq'}->is_downstream($firstlabel,$secondlabel));
984 } else {
985 return ($self->{'seq'}->is_upstream($firstlabel,$secondlabel));
986 }
987 }
988 #
989 #=head2 translate
990 #
991 # Title : translate
992 # Usage : $protein_seq = $obj->translate
993 # Function: Provides the translation of the DNA sequence
994 # using full IUPAC ambiguities in DNA/RNA and amino acid codes.
995 #
996 # The resulting translation is identical to EMBL/TREMBL database
997 # translations.
998 #
999 # Returns : a string
1000 # Args : character for terminator (optional) defaults to '*'
1001 # character for unknown amino acid (optional) defaults to 'X'
1002 # frame (optional) valid values 0, 1, 3, defaults to 0
1003 # codon table id (optional) defaults to 1
1004 #
1005 #=cut
1006 #
1007 #sub translate {
1008 # my ($self) = shift;
1009 # return ($self->translate_string($self->seq,@_));
1010 #}
1011 #
1012 #=head2 translate_string
1013 #
1014 # Title : translate_string
1015 # Usage : $protein_seq = $obj->translate_string("attcgtgttgatcgatta");
1016 # Function: Like translate, but can be used to translate subsequences after
1017 # having retrieved them as string.
1018 # Args : 1st argument is a string. Optional following arguments: like in
1019 # the translate method
1020 #
1021 #=cut
1022 #
1023 #
1024 #sub translate_string {
1025 # my($self) = shift;
1026 # my($seq) = shift;
1027 # my($stop, $unknown, $frame, $tableid) = @_;
1028 # my($i, $len, $output) = (0,0,'');
1029 # my($codon) = "";
1030 # my $aa;
1031 #
1032 #
1033 # ## User can pass in symbol for stop and unknown codons
1034 # unless(defined($stop) and $stop ne '') { $stop = "*"; }
1035 # unless(defined($unknown) and $unknown ne '') { $unknown = "X"; }
1036 # unless(defined($frame) and $frame ne '') { $frame = 0; }
1037 #
1038 # ## the codon table ID
1039 # if ($self->translation_table) {
1040 # $tableid = $self->translation_table;
1041 # }
1042 # unless(defined($tableid) and $tableid ne '') { $tableid = 1; }
1043 #
1044 # ##Error if monomer is "Amino"
1045 # $self->warn("Can't translate an amino acid sequence.")
1046 # if (defined $self->alphabet && $self->alphabet eq 'protein');
1047 #
1048 # ##Error if frame is not 0, 1 or 2
1049 # $self->warn("Valid values for frame are 0, 1, 2, not [$frame].")
1050 # unless ($frame == 0 or $frame == 1 or $frame == 2);
1051 #
1052 # #thows a warning if ID is invalid
1053 # my $codonTable = Bio::Tools::CodonTable->new( -id => $tableid);
1054 #
1055 # # deal with frame offset.
1056 # if( $frame ) {
1057 # $seq = substr ($seq,$frame);
1058 # }
1059 #
1060 # for $codon ( grep { CORE::length == 3 } split(/(.{3})/, $seq) ) {
1061 # my $aa = $codonTable->translate($codon);
1062 # if ($aa eq '*') {
1063 # $output .= $stop;
1064 # }
1065 # elsif ($aa eq 'X') {
1066 # $output .= $unknown;
1067 # }
1068 # else {
1069 # $output .= $aa ;
1070 # }
1071 # }
1072 # #if( substr($output,-1,1) eq $stop ) {
1073 # # chop $output;
1074 # #}
1075 #
1076 # return ($output);
1077 #}
1078
1079 =head2 gene
1080
1081 Title : gene
1082 Usage : my $gene=$obj->gene;
1083 Function: Gets or sets the reference to the LiveSeq::Gene object.
1084 Objects that are features of a LiveSeq Gene will have this
1085 attribute set automatically.
1086
1087 Returns : reference to an object of class Gene
1088 Note : if Gene object is not set, this method will return 0;
1089 Args : none or reference to object of class Bio::LiveSeq::Gene
1090
1091 =cut
1092
1093 sub gene {
1094 my ($self,$value) = @_;
1095 if (defined $value) {
1096 $self->{'gene'} = $value;
1097 }
1098 unless (exists $self->{'gene'}) {
1099 return (0);
1100 } else {
1101 return $self->{'gene'};
1102 }
1103 }
1104
1105 =head2 obj_valid
1106
1107 Title : obj_valid
1108 Usage : if ($obj->obj_valid) {do something;}
1109 Function: Checks if start and end labels are still valid for the ojbect,
1110 i.e. tests if the LiveSeq object is still valid
1111 Returns : boolean
1112 Args : none
1113
1114 =cut
1115
1116 sub obj_valid {
1117 my $self=shift;
1118 unless (($self->{'seq'}->valid($self->start()))&&($self->{'seq'}->valid($self->end()))) {
1119 return (0);
1120 }
1121 return (1);
1122 }
1123
1124 =head2 name
1125
1126 Title : name
1127 Usage : $name = $obj->name;
1128 : $name = $obj->name("ABCD");
1129 Function: Returns or sets the name of the object.
1130 If there is no name, it will return "unknown";
1131 Returns : A string
1132 Args : None
1133
1134 =cut
1135
1136 sub name {
1137 my ($self,$value) = @_;
1138 if (defined $value) {
1139 $self->{'name'} = $value;
1140 }
1141 unless (exists $self->{'name'}) {
1142 return "unknown";
1143 } else {
1144 return $self->{'name'};
1145 }
1146 }
1147
1148 =head2 desc
1149
1150 Title : desc
1151 Usage : $desc = $obj->desc;
1152 : $desc = $obj->desc("ABCD");
1153 Function: Returns or sets the description of the object.
1154 If there is no description, it will return "unknown";
1155 Returns : A string
1156 Args : None
1157
1158 =cut
1159
1160 sub desc {
1161 my ($self,$value) = @_;
1162 if (defined $value) {
1163 $self->{'desc'} = $value;
1164 }
1165 unless (exists $self->{'desc'}) {
1166 return "unknown";
1167 } else {
1168 return $self->{'desc'};
1169 }
1170 }
1171
1172 =head2 source
1173
1174 Title : source
1175 Usage : $name = $obj->source;
1176 : $name = $obj->source("Homo sapiens");
1177 Function: Returns or sets the organism that is source of the object.
1178 If there is no source, it will return "unknown";
1179 Returns : A string
1180 Args : None
1181
1182 =cut
1183
1184 sub source {
1185 my ($self,$value) = @_;
1186 if (defined $value) {
1187 $self->{'source'} = $value;
1188 }
1189 unless (exists $self->{'source'}) {
1190 return "unknown";
1191 } else {
1192 return $self->{'source'};
1193 }
1194 }
1195
1196 sub delete_Obj {
1197 my $self = shift;
1198 my @values= values %{$self};
1199 my @keys= keys %{$self};
1200
1201 foreach my $key ( @keys ) {
1202 delete $self->{$key};
1203 }
1204 foreach my $value ( @values ) {
1205 if (index(ref($value),"LiveSeq") != -1) { # object case
1206 eval {
1207 # delete $self->{$value};
1208 $value->delete_Obj;
1209 };
1210 } elsif (index(ref($value),"ARRAY") != -1) { # array case
1211 my @array=@{$value};
1212 my $element;
1213 foreach $element (@array) {
1214 eval {
1215 $element->delete_Obj;
1216 };
1217 }
1218 } elsif (index(ref($value),"HASH") != -1) { # object case
1219 my %hash=%{$value};
1220 my $element;
1221 foreach $element (%hash) {
1222 eval {
1223 $element->delete_Obj;
1224 };
1225 }
1226 }
1227 }
1228 return(1);
1229 }
1230
1231 1;