Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/LiveSeq/Chain.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 #!/usr/bin/perl | |
2 # $Id: Chain.pm,v 1.12 2001/06/18 08:27:53 heikki Exp $ | |
3 # | |
4 # bioperl module for Bio::LiveSeq::Chain | |
5 # | |
6 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net> | |
7 # | |
8 # Copyright Joseph Insana | |
9 # | |
10 # You may distribute this module under the same terms as perl itself | |
11 # | |
12 # POD documentation - main docs before the code | |
13 # | |
14 | |
15 =head1 NAME | |
16 | |
17 Bio::LiveSeq::Chain - DoubleChain DataStructure for Perl | |
18 | |
19 =head1 SYNOPSIS | |
20 | |
21 #documentation needed | |
22 | |
23 =head1 DESCRIPTION | |
24 | |
25 This is a general purpose module (that's why it's not in object-oriented | |
26 form) that introduces a novel datastructure in PERL. It implements | |
27 the "double linked chain". The elements of the chain can contain basically | |
28 everything. From chars to strings, from object references to arrays or hashes. | |
29 It is used in the LiveSequence project to create a dynamical DNA sequence, | |
30 easier to manipulate and change. It's use is mainly for sequence variation | |
31 analysis but it could be used - for example - in e-cell projects. | |
32 The Chain module in itself doesn't have any biological bias, so can be | |
33 used for any programming purpose. | |
34 | |
35 Each element of the chain (with the exclusion of the first and the last of the | |
36 chain) is connected to other two elements (the PREVious and the NEXT one). | |
37 There is no absolute position (like in an array), hence if positions are | |
38 important, they need to be computed (methods are provided). | |
39 Otherwise it's easy to keep track of the elements with their "LABELs". | |
40 There is one LABEL (think of it as a pointer) to each ELEMENT. The labels | |
41 won't change after insertions or deletions of the chain. So it's | |
42 always possible to retrieve an element even if the chain has been | |
43 modified by successive insertions or deletions. | |
44 From this the high potential profit for bioinformatics: dealing with | |
45 sequences in a way that doesn't have to rely on positions, without | |
46 the need of constantly updating them if the sequence changes, even | |
47 dramatically. | |
48 | |
49 =head1 AUTHOR - Joseph A.L. Insana | |
50 | |
51 Email: Insana@ebi.ac.uk, jinsana@gmx.net | |
52 | |
53 Address: | |
54 | |
55 EMBL Outstation, European Bioinformatics Institute | |
56 Wellcome Trust Genome Campus, Hinxton | |
57 Cambs. CB10 1SD, United Kingdom | |
58 | |
59 =head1 APPENDIX | |
60 | |
61 The rest of the documentation details each of the object | |
62 methods. Internal methods are usually preceded with a _ | |
63 | |
64 =cut | |
65 | |
66 # Let the code begin... | |
67 | |
68 # DoubleChain Data Structure for PERL | |
69 # by Joseph A.L. Insana - Deathson - Filius Mortis - Fal Mortais | |
70 # insana@ebi.ac.uk, jinsana@gmx.net | |
71 | |
72 package Bio::LiveSeq::Chain; | |
73 # Version history: | |
74 # Fri Mar 10 16:46:51 GMT 2000 v1.0 begun working on chains in perl | |
75 # Sat Mar 11 05:47:21 GMT 2000 v.1.4 working on splice method | |
76 # Sun Mar 12 14:08:31 GMT 2000 v.1.5 | |
77 # Sun Mar 12 17:21:51 GMT 2000 v.2.0 splice method working, is_updownstream made | |
78 # Sun Mar 12 18:11:22 GMT 2000 v.2.04 wrapped all in package Chain.pm | |
79 # Sun Mar 12 18:49:23 GMT 2000 v.2.08 added elements() | |
80 # Sun Mar 12 21:18:04 GMT 2000 v.2.1 done array2dchain, working on *insert* | |
81 # Sun Mar 12 23:04:40 GMT 2000 v.2.16 done *insert*, up_element, create_elems | |
82 # Sun Mar 12 23:45:32 GMT 2000 v.2.17 debugged and checked | |
83 # Mon Mar 13 00:44:51 GMT 2000 v.2.2 added mutate() | |
84 # Mon Mar 13 02:00:32 GMT 2000 v 2.21 added invert_dchain() | |
85 # Mon Mar 13 03:01:21 GMT 2000 v 2.22 created updown_chain2string | |
86 # Mon Mar 13 03:45:50 GMT 2000 v.2.24 added subchain_length() | |
87 # Mon Mar 13 17:25:04 GMT 2000 v.2.26 added element_at_pos and pos_of_element | |
88 # Wed Mar 15 23:05:06 GMT 2000 v.2.27 use strict enforced | |
89 # Thu Mar 16 19:05:34 GMT 2000 v.2.3 changed dchain->chain everywhere | |
90 # Fri Mar 17 01:48:36 GMT 2000 v.2.33 mutate_element renamed, created new | |
91 # methods: set_value, get_value... | |
92 # Fri Mar 17 05:03:15 GMT 2000 v.2.4 set_value_at_pos, get_value_at_pos | |
93 # get_label_at_pos... | |
94 # Fri Mar 17 15:51:07 GMT 2000 v.2.41 renamed pos_of_element -> get_pos_of_label | |
95 # Fri Mar 17 18:10:36 GMT 2000 v.2.44 recoded subchain_length and pos_of_label | |
96 # Fri Mar 17 20:12:27 GMT 2000 v.2.5 NAMING change: index->label everywhere | |
97 # Mon Mar 20 18:33:10 GMT 2000 v.2.52 label_exists(), start(), end() | |
98 # Mon Mar 20 23:10:28 GMT 2000 v.2.6 labels() created | |
99 # Wed Mar 22 18:35:17 GMT 2000 v.2.61 chain2string() rewritten | |
100 # Tue Dec 12 14:47:58 GMT 2000 v 2.66 optimized with /use integer/ | |
101 # Tue Dec 12 16:28:45 GMT 2000 v 2.7 rewritten comments to methods in pod style | |
102 | |
103 # | |
104 $VERSION=2.7; | |
105 # | |
106 # TODO_list: | |
107 # **** cleanup code | |
108 # **** performance concerns | |
109 # *??* create hash2dchain ???? (with hashkeys used for label) | |
110 # **????** how about using array of arrays instead than hash of arrays?? | |
111 # | |
112 # further strict complaints: | |
113 # in verbose $string assignment around line 721 ??? | |
114 | |
115 # TERMINOLOGY update, naming convention: | |
116 # "chain" the datastructure | |
117 # "element" the individual units that compose a chain | |
118 # "label" the unique name of a single element | |
119 # "position" the position of an element into the chain according to a | |
120 # particular coordinate system (e.g. counting from the start) | |
121 # "value" what is stored in a single element | |
122 | |
123 use Carp qw(croak cluck carp); # as of 2.3 | |
124 use strict; # as of 2.27 | |
125 use integer; # WARNING: this is to increase performance | |
126 # a little bit of attention has to be given if float need to | |
127 # be stored as elements of the array | |
128 # the use of this "integer" affects all operations but not | |
129 # assignments. So float CAN be assigned as elements of the chain | |
130 # BUT, if you assign $z=-1.8;, $z will be equal to -1 because | |
131 # "-" counts as a unary operation! | |
132 | |
133 =head2 _updown_chain2string | |
134 | |
135 Title : chain2string | |
136 Usage : $string = Bio::LiveSeq::Chain::chain2string("down",$chain,6,9) | |
137 Function: reads the contents of the chain, outputting a string | |
138 Returns : a string | |
139 Examples: | |
140 : down_chain2string($chain) -> all the chain from begin to end | |
141 : down_chain2string($chain,6) -> from 6 to the end | |
142 : down_chain2string($chain,6,4) -> from 6, going on 4 elements | |
143 : down_chain2string($chain,6,"",10) -> from 6 to 10 | |
144 : up_chain2string($chain,10,"",6) -> from 10 to 6 upstream | |
145 Defaults: start=first element; if len undef, goes to last | |
146 if last undef, goes to end | |
147 if last defined, it overrides len (undefining it) | |
148 Error code: -1 | |
149 Args : "up"||"down" as first argument to specify the reading direction | |
150 reference (to the chain) | |
151 [first] [len] [last] optional integer arguments to specify how | |
152 much and from (and to) where to read | |
153 | |
154 =cut | |
155 | |
156 # methods rewritten 2.61 | |
157 sub up_chain2string { | |
158 _updown_chain2string("up",@_); | |
159 } | |
160 sub down_chain2string { | |
161 _updown_chain2string("down",@_); | |
162 } | |
163 | |
164 sub _updown_chain2string { | |
165 my ($direction,$chain,$first,$len,$last)=@_; | |
166 unless($chain) { cluck "no chain input"; return (-1); } | |
167 my $begin=$chain->{'begin'}; # the label of the BEGIN element | |
168 my $end=$chain->{'end'}; # the label of the END element | |
169 my $flow; | |
170 | |
171 if ($direction eq "up") { | |
172 $flow=2; # used to determine the direction of chain navigation | |
173 unless ($first) { $first=$end; } # if undef or 0, use $end | |
174 } else { # defaults to "down" | |
175 $flow=1; # used to determine the direction of chain navigation | |
176 unless ($first) { $first=$begin; } # if undef or 0, use $begin | |
177 } | |
178 | |
179 unless($chain->{$first}) { | |
180 cluck "label for first not defined"; return (-1); } | |
181 if ($last) { # if last is defined, it gets priority and len is not used | |
182 unless($chain->{$last}) { | |
183 cluck "label for last not defined"; return (-1); } | |
184 if ($len) { | |
185 warn "Warning chain2string: argument LAST:$last overriding LEN:$len!"; | |
186 undef $len; | |
187 } | |
188 } else { | |
189 if ($direction eq "up") { | |
190 $last=$begin; # if last not defined, go 'till begin (or upto len elements) | |
191 } else { | |
192 $last=$end; # if last not defined, go 'till end (or upto len elements) | |
193 } | |
194 } | |
195 | |
196 my ($string,@array); | |
197 my $label=$first; my $i=1; | |
198 my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef | |
199 unless (defined $afterlast) { $afterlast=0; } # keep strict happy | |
200 | |
201 # proceed for len elements or until last, whichever comes first | |
202 # if $len undef goes till end | |
203 while (($label) && ($label != $afterlast) && ($i <= ($len || $i + 1))) { | |
204 @array=@{$chain->{$label}}; | |
205 $string .= $array[0]; | |
206 $label = $array[$flow]; | |
207 $i++; | |
208 } | |
209 return ($string); # if chain is interrupted $string won't be complete | |
210 } | |
211 | |
212 =head2 _updown_labels | |
213 | |
214 Title : labels | |
215 Usage : @labels = Bio::LiveSeq::Chain::_updown_labels("down",$chain,4,16) | |
216 Function: returns all the labels in a chain or those between two | |
217 specified ones (termed "first" and "last") | |
218 Returns : a reference to an array containing the labels | |
219 Args : "up"||"down" as first argument to specify the reading direction | |
220 reference (to the chain) | |
221 [first] [last] (integer for the starting and eneding labels) | |
222 | |
223 =cut | |
224 | |
225 | |
226 # arguments: CHAIN_REF [FIRSTLABEL] [LASTLABEL] | |
227 # returns: reference to array containing the labels | |
228 sub down_labels { | |
229 my ($chain,$first,$last)=@_; | |
230 _updown_labels("down",$chain,$first,$last); | |
231 } | |
232 sub up_labels { | |
233 my ($chain,$first,$last)=@_; | |
234 _updown_labels("up",$chain,$first,$last); | |
235 } | |
236 # arguments: "up"||"down" CHAIN_REF [FIRSTLABEL] [LASTLABEL] | |
237 # returns: reference to array containing the labels | |
238 sub _updown_labels { | |
239 my ($direction,$chain,$first,$last)=@_; | |
240 unless($chain) { cluck "no chain input"; return (0); } | |
241 my $begin=$chain->{'begin'}; # the label of the BEGIN element | |
242 my $end=$chain->{'end'}; # the label of the END element | |
243 my $flow; | |
244 if ($direction eq "up") { $flow=2; | |
245 unless ($first) { $first=$end; } | |
246 unless ($last) { $last=$begin; } | |
247 } else { $flow=1; | |
248 unless ($last) { $last=$end; } | |
249 unless ($first) { $first=$begin; } | |
250 } | |
251 unless($chain->{$first}) { warn "not existing label $first"; return (0); } | |
252 unless($chain->{$last}) { warn "not existing label $last"; return (0); } | |
253 | |
254 my $label=$first; my @labels; | |
255 my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef | |
256 unless (defined $afterlast) { $afterlast=0; } # keep strict happy | |
257 | |
258 while (($label)&&($label != $afterlast)) { | |
259 push(@labels,$label); | |
260 $label=$chain->{$label}[$flow]; | |
261 } | |
262 return (\@labels); # if chain is interrupted @labels won't be complete | |
263 } | |
264 | |
265 | |
266 =head2 start | |
267 | |
268 Title : start | |
269 Usage : $start = Bio::LiveSeq::Chain::start() | |
270 Returns : the label marking the start of the chain | |
271 Errorcode: -1 | |
272 Args : none | |
273 | |
274 =cut | |
275 | |
276 sub start { | |
277 my $chain=$_[0]; | |
278 unless($chain) { cluck "no chain input"; return (-1); } | |
279 return ($chain->{'begin'}); | |
280 } | |
281 | |
282 =head2 end | |
283 | |
284 Title : end | |
285 Usage : $end = Bio::LiveSeq::Chain::end() | |
286 Returns : the label marking the end of the chain | |
287 Errorcode: -1 | |
288 Args : none | |
289 | |
290 =cut | |
291 | |
292 sub end { | |
293 my $chain=$_[0]; | |
294 unless($chain) { cluck "no chain input"; return (-1); } | |
295 return ($chain->{'end'}); | |
296 } | |
297 | |
298 =head2 label_exists | |
299 | |
300 Title : label_exists | |
301 Usage : $check = Bio::LiveSeq::Chain::label_exists($chain,$label) | |
302 Function: It checks if a label is defined, i.e. if an element is there or | |
303 is not there anymore | |
304 Returns : 1 if the label exists, 0 if it is not there, -1 error | |
305 Errorcode: -1 | |
306 Args : reference to the chain, integer | |
307 | |
308 =cut | |
309 | |
310 sub label_exists { | |
311 my ($chain,$label)=@_; | |
312 unless($chain) { cluck "no chain input"; return (-1); } | |
313 if ($label && $chain->{$label}) { return (1); } else { return (0) }; | |
314 } | |
315 | |
316 | |
317 =head2 down_get_pos_of_label | |
318 | |
319 Title : down_get_pos_of_label | |
320 Usage : $position = Bio::LiveSeq::Chain::down_get_pos_of_label($chain,$label,$first) | |
321 Function: returns the position of $label counting from $first, i.e. taking | |
322 $first as 1 of coordinate system. If $first is not specified it will | |
323 count from the start of the chain. | |
324 Returns : | |
325 Errorcode: 0 | |
326 Args : reference to the chain, integer (the label of interest) | |
327 optional: integer (a different label that will be taken as the | |
328 first one, i.e. the one to count from) | |
329 Note: It counts "downstream". To proceed backward use up_get_pos_of_label | |
330 | |
331 =cut | |
332 | |
333 sub down_get_pos_of_label { | |
334 #down_chain2string($_[0],$_[2],undef,$_[1],"counting"); | |
335 my ($chain,$label,$first)=@_; | |
336 _updown_count("down",$chain,$first,$label); | |
337 } | |
338 sub up_get_pos_of_label { | |
339 #up_chain2string($_[0],$_[2],undef,$_[1],"counting"); | |
340 my ($chain,$label,$first)=@_; | |
341 _updown_count("up",$chain,$first,$label); | |
342 } | |
343 | |
344 =head2 down_subchain_length | |
345 | |
346 Title : down_subchain_length | |
347 Usage : $length = Bio::LiveSeq::Chain::down_subchain_length($chain,$first,$last) | |
348 Function: returns the length of the chain between the labels "first" and "last", included | |
349 Returns : integer | |
350 Errorcode: 0 | |
351 Args : reference to the chain, integer, integer | |
352 Note: It counts "downstream". To proceed backward use up_subchain_length | |
353 | |
354 =cut | |
355 | |
356 # arguments: chain_ref [first] [last] | |
357 # returns the length of the chain between first and last (included) | |
358 sub down_subchain_length { | |
359 #down_chain2string($_[0],$_[1],undef,$_[2],"counting"); | |
360 my ($chain,$first,$last)=@_; | |
361 _updown_count("down",$chain,$first,$last); | |
362 } | |
363 sub up_subchain_length { | |
364 #up_chain2string($_[0],$_[1],undef,$_[2],"counting"); | |
365 my ($chain,$first,$last)=@_; | |
366 _updown_count("up",$chain,$first,$last); | |
367 } | |
368 | |
369 # arguments: DIRECTION CHAIN_REF FIRSTLABEL LASTLABEL | |
370 # errorcode 0 | |
371 sub _updown_count { | |
372 my ($direction,$chain,$first,$last)=@_; | |
373 unless($chain) { cluck "no chain input"; return (0); } | |
374 my $begin=$chain->{'begin'}; # the label of the BEGIN element | |
375 my $end=$chain->{'end'}; # the label of the END element | |
376 my $flow; | |
377 if ($direction eq "up") { $flow=2; | |
378 unless ($first) { $first=$end; } | |
379 unless ($last) { $last=$begin; } | |
380 } else { $flow=1; | |
381 unless ($last) { $last=$end; } | |
382 unless ($first) { $first=$begin; } | |
383 } | |
384 unless($chain->{$first}) { warn "not existing label $first"; return (0); } | |
385 unless($chain->{$last}) { warn "not existing label $last"; return (0); } | |
386 | |
387 my $label=$first; my $count; | |
388 my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef | |
389 unless (defined $afterlast) { $afterlast=0; } # keep strict happy | |
390 | |
391 while (($label)&&($label != $afterlast)) { | |
392 $count++; | |
393 $label=$chain->{$label}[$flow]; | |
394 } | |
395 return ($count); # if chain is interrupted, $i will be up to the breaking point | |
396 } | |
397 | |
398 =head2 invert_chain | |
399 | |
400 Title : invert_chain | |
401 Usage : $errorcode=Bio::LiveSeq::Chain::invert_chain($chain) | |
402 Function: completely inverts the order of the chain elements; begin is swapped with end and all links updated (PREV&NEXT fields swapped) | |
403 Returns : 1 if all OK, 0 if errors | |
404 Errorcode: 0 | |
405 Args : reference to the chain | |
406 | |
407 =cut | |
408 | |
409 sub invert_chain { | |
410 my $chain=$_[0]; | |
411 unless($chain) { cluck "no chain input"; return (0); } | |
412 my $begin=$chain->{'begin'}; # the name of the first element | |
413 my $end=$chain->{'end'}; # the name of the last element | |
414 my ($label,@array); | |
415 $label=$begin; # starts from the beginning | |
416 while ($label) { # proceed with linked elements, swapping PREV and NEXT | |
417 @array=@{$chain->{$label}}; | |
418 ($chain->{$label}[1],$chain->{$label}[2])=($array[2],$array[1]); # swap | |
419 $label = $array[1]; # go to the next one | |
420 } | |
421 # now swap begin and end fields | |
422 ($chain->{'begin'},$chain->{'end'})=($end,$begin); | |
423 return (1); # that's it | |
424 } | |
425 | |
426 # warning that method has changed name | |
427 #sub mutate_element { | |
428 #croak "Warning: old method name. Please update code to 'set_value_at_label'\n"; | |
429 # &set_value_at_label; | |
430 #} | |
431 | |
432 =head2 down_get_value_at_pos | |
433 | |
434 Title : down_get_value_at_pos | |
435 Usage : $value = Bio::LiveSeq::Chain::down_get_value_at_pos($chain,$position,$first) | |
436 Function: used to access the value of the chain at a particular position instead than directly with a label pointer. It will count the position from the start of the chain or from the label $first, if $first is specified | |
437 Returns : whatever is stored in the element of the chain | |
438 Errorcode: 0 | |
439 Args : reference to the chain, integer, [integer] | |
440 Note: It works "downstream". To proceed backward use up_get_value_at_pos | |
441 | |
442 =cut | |
443 | |
444 #sub get_value_at_pos { | |
445 #croak "Please use instead: down_get_value_at_pos"; | |
446 ##&down_get_value_at_pos; | |
447 #} | |
448 sub down_get_value_at_pos { | |
449 my ($chain,$position,$first)=@_; | |
450 my $label=down_get_label_at_pos($chain,$position,$first); | |
451 # check place of change | |
452 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist | |
453 warn "not existing element $label"; return (0); } | |
454 return _get_value($chain,$label); | |
455 } | |
456 sub up_get_value_at_pos { | |
457 my ($chain,$position,$first)=@_; | |
458 my $label=up_get_label_at_pos($chain,$position,$first); | |
459 # check place of change | |
460 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist | |
461 warn "not existing element $label"; return (0); } | |
462 return _get_value($chain,$label); | |
463 } | |
464 | |
465 =head2 down_set_value_at_pos | |
466 | |
467 Title : down_set_value_at_pos | |
468 Usage : $errorcode = Bio::LiveSeq::Chain::down_set_value_at_pos($chain,$newvalue,$position,$first) | |
469 Function: used to store a new value inside an element of the chain at a particular position instead than directly with a label pointer. It will count the position from the start of the chain or from the label $first, if $first is specified | |
470 Returns : 1 | |
471 Errorcode: 0 | |
472 Args : reference to the chain, newvalue, integer, [integer] | |
473 (newvalue can be: integer, string, object reference, hash ref) | |
474 Note: It works "downstream". To proceed backward use up_set_value_at_pos | |
475 Note2: If the $newvalue is undef, it will delete the contents of the | |
476 element but it won't remove the element from the chain. | |
477 | |
478 =cut | |
479 | |
480 #sub set_value_at_pos { | |
481 #croak "Please use instead: down_set_value_at_pos"; | |
482 ##&down_set_value_at_pos; | |
483 #} | |
484 sub down_set_value_at_pos { | |
485 my ($chain,$value,$position,$first)=@_; | |
486 my $label=down_get_label_at_pos($chain,$position,$first); | |
487 # check place of change | |
488 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist | |
489 warn "not existing element $label"; return (0); } | |
490 _set_value($chain,$label,$value); | |
491 return (1); | |
492 } | |
493 sub up_set_value_at_pos { | |
494 my ($chain,$value,$position,$first)=@_; | |
495 my $label=up_get_label_at_pos($chain,$position,$first); | |
496 # check place of change | |
497 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist | |
498 warn "not existing element $label"; return (0); } | |
499 _set_value($chain,$label,$value); | |
500 return (1); | |
501 } | |
502 | |
503 | |
504 =head2 down_set_value_at_label | |
505 | |
506 Title : down_set_value_at_label | |
507 Usage : $errorcode = Bio::LiveSeq::Chain::down_set_value_at_label($chain,$newvalue,$label) | |
508 Function: used to store a new value inside an element of the chain defined by its label. | |
509 Returns : 1 | |
510 Errorcode: 0 | |
511 Args : reference to the chain, newvalue, integer | |
512 (newvalue can be: integer, string, object reference, hash ref) | |
513 Note: It works "downstream". To proceed backward use up_set_value_at_label | |
514 Note2: If the $newvalue is undef, it will delete the contents of the | |
515 element but it won't remove the element from the chain. | |
516 | |
517 =cut | |
518 | |
519 sub set_value_at_label { | |
520 my ($chain,$value,$label)=@_; | |
521 unless($chain) { cluck "no chain input"; return (0); } | |
522 | |
523 # check place of change | |
524 unless($chain->{$label}) { # complain if label doesn't exist | |
525 warn "not existing element $label"; return (0); } | |
526 _set_value($chain,$label,$value); | |
527 return (1); | |
528 } | |
529 | |
530 =head2 down_get_value_at_label | |
531 | |
532 Title : down_get_value_at_label | |
533 Usage : $value = Bio::LiveSeq::Chain::down_get_value_at_label($chain,$label) | |
534 Function: used to access the value of the chain from one element defined by its label. | |
535 Returns : whatever is stored in the element of the chain | |
536 Errorcode: 0 | |
537 Args : reference to the chain, integer | |
538 Note: It works "downstream". To proceed backward use up_get_value_at_label | |
539 | |
540 =cut | |
541 | |
542 sub get_value_at_label { | |
543 my $chain=$_[0]; | |
544 unless($chain) { cluck "no chain input"; return (0); } | |
545 my $label = $_[1]; # the name of the element | |
546 | |
547 # check place of change | |
548 unless($chain->{$label}) { # complain if label doesn't exist | |
549 warn "not existing label $label"; return (0); } | |
550 return _get_value($chain,$label); | |
551 } | |
552 | |
553 # arguments: CHAIN_REF LABEL VALUE | |
554 sub _set_value { | |
555 my ($chain,$label,$value)=@_; | |
556 $chain->{$label}[0]=$value; | |
557 } | |
558 # arguments: CHAIN_REF LABEL | |
559 sub _get_value { | |
560 my ($chain,$label)=@_; | |
561 return $chain->{$label}[0]; | |
562 } | |
563 | |
564 =head2 down_get_label_at_pos | |
565 | |
566 Title : down_get_label_at_pos | |
567 Usage : $label = Bio::LiveSeq::Chain::down_get_label_at_pos($chain,$position,$first) | |
568 Function: used to retrieve the label of an an element of the chain at a particular position. It will count the position from the start of the chain or from the label $first, if $first is specified | |
569 Returns : integer | |
570 Errorcode: 0 | |
571 Args : reference to the chain, integer, [integer] | |
572 Note: It works "downstream". To proceed backward use up_get_label_at_pos | |
573 | |
574 =cut | |
575 | |
576 # arguments: CHAIN_REF POSITION [FIRST] | |
577 # returns: LABEL of element found counting from FIRST | |
578 sub down_get_label_at_pos { | |
579 _updown_get_label_at_pos("down",@_); | |
580 } | |
581 sub up_get_label_at_pos { | |
582 _updown_get_label_at_pos("up",@_); | |
583 } | |
584 | |
585 # arguments: [DIRECTION] CHAIN_REF POSITION [FIRST] | |
586 # Default DIRECTION="down" | |
587 # if FIRST is undefined, FIRST=START (if DIRECTION=down) or FIRST=END (up) | |
588 | |
589 sub _updown_get_label_at_pos { | |
590 my ($direction,$chain,$position,$first)=@_; | |
591 unless($chain) { cluck "no chain input"; return (0); } | |
592 my $begin=$chain->{'begin'}; # the label of the BEGIN element | |
593 my $end=$chain->{'end'}; # the label of the END element | |
594 my $flow; | |
595 if ($direction eq "up") { $flow=2; unless ($first) { $first=$end; } | |
596 } else { $flow=1; unless ($first) { $first=$begin; } } | |
597 unless($chain->{$first}) { warn "not existing label $first"; return (0); } | |
598 | |
599 my $label=$first; | |
600 my $i=1; | |
601 while ($i < $position) { | |
602 $label=$chain->{$label}[$flow]; | |
603 $i++; | |
604 unless ($label) { return (0); } # chain ended before position reached | |
605 } | |
606 return ($label); | |
607 } | |
608 | |
609 # for english_concerned, latin_unconcerned people | |
610 sub preinsert_string { &praeinsert_string } | |
611 sub preinsert_array { &praeinsert_array } | |
612 | |
613 # praeinsert_string CHAIN_REF STRING [POSITION] | |
614 # the chars of STRING are passed to praeinsert_array | |
615 # the chars are inserted in CHAIN, before POSITION | |
616 # if POSITION is undef, default is to prepend the string to the beginning | |
617 # i.e. POSITION is START of CHAIN | |
618 sub praeinsert_string { | |
619 my @string=split(//,$_[1]); | |
620 praeinsert_array($_[0],\@string,$_[2]); | |
621 } | |
622 | |
623 # postinsert_string CHAIN_REF STRING [POSITION] | |
624 # the chars of STRING are passed to postinsert_array | |
625 # the chars are inserted in CHAIN, after POSITION | |
626 # if POSITION is undef, default is to append the string to the end | |
627 # i.e. POSITION is END of CHAIN | |
628 sub postinsert_string { | |
629 my @string=split(//,$_[1]); | |
630 postinsert_array($_[0],\@string,$_[2]); | |
631 } | |
632 | |
633 # praeinsert_array CHAIN_REF ARRAY_REF [POSITION] | |
634 # the elements of ARRAY are inserted in CHAIN, before POSITION | |
635 # if POSITION is undef, default is to prepend the elements to the beginning | |
636 # i.e. POSITION is START of CHAIN | |
637 sub praeinsert_array { | |
638 _praepostinsert_array($_[0],"prae",$_[1],$_[2]); | |
639 } | |
640 | |
641 # postinsert_array CHAIN_REF ARRAY_REF [POSITION] | |
642 # the elements of ARRAY are inserted in CHAIN, after POSITION | |
643 # if POSITION is undef, default is to append the elements to the end | |
644 # i.e. POSITION is END of CHAIN | |
645 sub postinsert_array { | |
646 _praepostinsert_array($_[0],"post",$_[1],$_[2]); | |
647 } | |
648 | |
649 | |
650 =head2 _praepostinsert_array | |
651 | |
652 Title : _praepostinsert_array | |
653 Usage : ($insbegin,$insend) = Bio::LiveSeq::Chain::_praepostinsert_array($chainref,"post",$arrayref,$position) | |
654 Function: the elements of the array specified by $arrayref are inserted (creating a new subchain) in the chain specified by $chainref, before or after (depending on the "prae"||"post" keyword passed as second argument) the specified position. | |
655 Returns : two labels: the first and the last of the inserted subchain | |
656 Defaults: if no position is specified, the new chain will be inserted after | |
657 (post) the first element of the chain | |
658 Errorcode: 0 | |
659 Args : chainref, "prae"||"post", arrayref, integer (position) | |
660 | |
661 =cut | |
662 | |
663 # returns: 0 if errors, otherwise returns references of begin and end of | |
664 # the insertion | |
665 sub _praepostinsert_array { | |
666 my $chain=$_[0]; | |
667 unless($chain) { cluck "no chain input"; return (0); } | |
668 my $praepost=$_[1] || "post"; # defaults to post | |
669 my ($prae,$post); | |
670 my $position=$_[3]; | |
671 my $begin=$chain->{'begin'}; # the name of the first element of the chain | |
672 my $end=$chain->{'end'}; # the name of the the last element of the chain | |
673 # check if prae or post insertion and prepare accordingly | |
674 if ($praepost eq "prae") { | |
675 $prae=1; | |
676 unless (($position eq 0)||($position)) { $position=$begin; } # if undef, use $begin | |
677 } else { | |
678 $post=1; | |
679 unless (($position eq 0)||($position)) { $position=$end; } # if undef, use $end | |
680 } | |
681 # check place of insertion | |
682 unless($chain->{$position}) { # complain if position doesn't exist | |
683 warn ("Warning _praepostinsert_array: not existing element $position"); | |
684 return (0); | |
685 } | |
686 | |
687 # check if there are elements to insert | |
688 my $elements=$_[2]; # reference to the array containing the new elements | |
689 my $elements_count=scalar(@{$elements}); | |
690 unless ($elements_count) { | |
691 warn ("Warning _praepostinsert_array: no elements input"); return (0); } | |
692 | |
693 # create new chainelements with offset=firstfree(chain) | |
694 my ($insertbegin,$insertend)=_create_chain_elements($chain,$elements); | |
695 | |
696 # DEBUGGING | |
697 #print "Executing ${praepost}insertion of $elements_count elements ('@{$elements}') at position: $position\n"; | |
698 | |
699 # attach the new chain to the old chain | |
700 # 4 cases: prae@begin, prae@middle, post@middle, post@end | |
701 # NOTE: in case of double joinings always join wisely so not to | |
702 # delete the PREV/NEXT attribute before it is needed | |
703 my $noerror=1; | |
704 if ($prae) { | |
705 if ($position==$begin) { # 1st case: prae@begin | |
706 $noerror=_join_chain_elements($chain,$insertend,$begin); | |
707 $chain->{'begin'}=$insertbegin; | |
708 } else { # 2nd case: prae@middle | |
709 $noerror=_join_chain_elements($chain,up_element($chain,$position),$insertbegin); | |
710 $noerror=_join_chain_elements($chain,$insertend,$position); | |
711 } | |
712 } elsif ($post) { | |
713 if ($position==$end) { # 4th case: post@end | |
714 $noerror=_join_chain_elements($chain,$end,$insertbegin); | |
715 $chain->{'end'}=$insertend; | |
716 } else { # 3rd case: post@middle # note the order of joins (important) | |
717 $noerror=_join_chain_elements($chain,$insertend,down_element($chain,$position)); | |
718 $noerror=_join_chain_elements($chain,$position,$insertbegin); | |
719 } | |
720 } else { # this should never happen | |
721 die "_praepostinsert_array: Something went very wrong"; | |
722 } | |
723 | |
724 # check for errors and return begin,end of insertion | |
725 if ($noerror) { | |
726 return ($insertbegin,$insertend); | |
727 } else { # something went wrong with the joinings | |
728 warn "Warning _praepostinsert_array: Joining of insertion failed"; | |
729 return (0); | |
730 } | |
731 } | |
732 | |
733 # create new chain elements with offset=firstfree | |
734 # arguments: CHAIN_REF ARRAY_REF | |
735 # returns: pointers to BEGIN and END of new chained elements created | |
736 # returns 0 if error(s) encountered | |
737 sub _create_chain_elements { | |
738 my $chain=$_[0]; | |
739 unless($chain) { | |
740 warn ("Warning _create_chain_elements: no chain input"); return (0); } | |
741 my $arrayref=$_[1]; | |
742 my $array_count=scalar(@{$arrayref}); | |
743 unless ($array_count) { | |
744 warn ("Warning _create_chain_elements: no elements input"); return (0); } | |
745 my $begin=$chain->{'firstfree'}; | |
746 my $i=$begin-1; | |
747 my $element; | |
748 foreach $element (@{$arrayref}) { | |
749 $i++; | |
750 $chain->{$i}=[$element,$i+1,$i-1]; | |
751 } | |
752 my $end=$i; | |
753 $chain->{'firstfree'}=$i+1; # what a new added element should be called | |
754 $chain->{'size'} += $end-$begin+1; # increase size of chain | |
755 # leave sticky edges (to be joined by whoever called this subroutine) | |
756 $chain->{$begin}[2]=undef; | |
757 $chain->{$end}[1]=undef; | |
758 return ($begin,$end); # return pointers to first and last of the newelements | |
759 } | |
760 | |
761 # argument: CHAIN_REF ELEMENT | |
762 # returns: name of DOWN/NEXT element (the downstream one) | |
763 # returns -1 if error encountered (e.g. chain or elements undefined) | |
764 # returns 0 if there's no DOWN element | |
765 sub down_element { | |
766 _updown_element("down",@_); | |
767 } | |
768 # argument: CHAIN_REF ELEMENT | |
769 # returns: name of UP/PREV element (the upstream one) | |
770 # returns -1 if error encountered (e.g. chain or elements undefined) | |
771 # returns 0 if there's no UP element | |
772 sub up_element { | |
773 _updown_element("up",@_); | |
774 } | |
775 | |
776 # used by both is_up_element and down_element | |
777 sub _updown_element { | |
778 my $direction=$_[0] || "down"; # defaults to downstream | |
779 my $flow; | |
780 if ($direction eq "up") { | |
781 $flow=2; # used to determine the direction of chain navigation | |
782 } else { | |
783 $flow=1; # used to determine the direction of chain navigation | |
784 } | |
785 my $chain=$_[1]; | |
786 unless($chain) { | |
787 warn ("Warning ${direction}_element: no chain input"); return (-1); } | |
788 my $me = $_[2]; # the name of the element | |
789 my $it = $chain->{$me}[$flow]; # the prev||next one, upstream||downstream | |
790 if ($it) { | |
791 return ($it); # return the name of prev||next element | |
792 } else { | |
793 return (0); # there is no prev||next element ($it is undef) | |
794 } | |
795 } | |
796 | |
797 # used by both is_downstream and is_upstream | |
798 sub _is_updownstream { | |
799 my $direction=$_[0] || "down"; # defaults to downstream | |
800 my $flow; | |
801 if ($direction eq "up") { | |
802 $flow=2; # used to determine the direction of chain navigation | |
803 } else { | |
804 $flow=1; # used to determine the direction of chain navigation | |
805 } | |
806 my $chain=$_[1]; | |
807 unless($chain) { | |
808 warn ("Warning is_${direction}stream: no chain input"); return (-1); } | |
809 my $first=$_[2]; # the name of the first element | |
810 my $second=$_[3]; # the name of the first element | |
811 if ($first==$second) { | |
812 warn ("Warning is_${direction}stream: first==second!!"); return (0); } | |
813 unless($chain->{$first}) { | |
814 warn ("Warning is_${direction}stream: first element not defined"); return (-1); } | |
815 unless($chain->{$second}) { | |
816 warn ("Warning is_${direction}stream: second element not defined"); return (-1); } | |
817 my ($label,@array); | |
818 $label=$first; | |
819 my $found=0; | |
820 while (($label)&&(!($found))) { # searches till the end or till found | |
821 if ($label==$second) { | |
822 $found=1; | |
823 } | |
824 @array=@{$chain->{$label}}; | |
825 $label = $array[$flow]; # go to the prev||next one, upstream||downstream | |
826 } | |
827 return $found; | |
828 } | |
829 | |
830 =head2 is_downstream | |
831 | |
832 Title : is_downstream | |
833 Usage : Bio::LiveSeq::Chain::is_downstream($chainref,$firstlabel,$secondlabel) | |
834 Function: checks if SECONDlabel follows FIRSTlabel | |
835 It runs downstream the elements of the chain from FIRST searching | |
836 for SECOND. | |
837 Returns : 1 if SECOND is found /after/ FIRST; 0 otherwise (i.e. if it | |
838 reaches the end of the chain without having found it) | |
839 Errorcode -1 | |
840 Args : two labels (integer) | |
841 | |
842 =cut | |
843 | |
844 sub is_downstream { | |
845 _is_updownstream("down",@_); | |
846 } | |
847 | |
848 =head2 is_upstream | |
849 | |
850 Title : is_upstream | |
851 Usage : Bio::LiveSeq::Chain::is_upstream($chainref,$firstlabel,$secondlabel) | |
852 Function: checks if SECONDlabel follows FIRSTlabel | |
853 It runs upstream the elements of the chain from FIRST searching | |
854 for SECOND. | |
855 Returns : 1 if SECOND is found /after/ FIRST; 0 otherwise (i.e. if it | |
856 reaches the end of the chain without having found it) | |
857 Errorcode -1 | |
858 Args : two labels (integer) | |
859 | |
860 =cut | |
861 | |
862 sub is_upstream { | |
863 _is_updownstream("up",@_); | |
864 } | |
865 | |
866 =head2 check_chain | |
867 | |
868 Title : check_chain | |
869 Usage : @errorcodes = Bio::LiveSeq::Chain::check_chain() | |
870 Function: a wraparound to a series of check for consistency of the chain | |
871 It will check for boundaries, size, backlinking and forwardlinking | |
872 Returns : array of 4 warn codes, each can be 1 (all ok) or 0 (something wrong) | |
873 Errorcode: 0 | |
874 Args : none | |
875 Note : this is slow and through. It is not really needed. It is mostly | |
876 a code-developer tool. | |
877 | |
878 =cut | |
879 | |
880 sub check_chain { | |
881 my $chain=$_[0]; | |
882 unless($chain) { | |
883 warn ("Warning check_chain: no chain input"); return (-1); } | |
884 my ($warnbound,$warnsize,$warnbacklink,$warnforlink); | |
885 $warnbound=&_boundcheck; # passes on the arguments of the subroutine | |
886 $warnsize=&_sizecheck; | |
887 $warnbacklink=&_downlinkcheck; | |
888 $warnforlink=&_uplinkcheck; | |
889 return ($warnbound,$warnsize,$warnbacklink,$warnforlink); | |
890 } | |
891 | |
892 # consistency check for forwardlinks walking upstream | |
893 # argument: a chain reference | |
894 # returns: 1 all OK 0 problems | |
895 sub _uplinkcheck { | |
896 _updownlinkcheck("up",@_); | |
897 } | |
898 | |
899 # consistency check for backlinks walking downstream | |
900 # argument: a chain reference | |
901 # returns: 1 all OK 0 problems | |
902 sub _downlinkcheck { | |
903 _updownlinkcheck("down",@_); | |
904 } | |
905 | |
906 # consistency check for links, common to _uplinkcheck and _downlinkcheck | |
907 # argument: "up"||"down", check_ref | |
908 # returns: 1 all OK 0 problems | |
909 sub _updownlinkcheck { | |
910 my $direction=$_[0] || "down"; # defaults to downstream | |
911 my ($flow,$wolf); | |
912 my $chain=$_[1]; | |
913 unless($chain) { | |
914 warn ("Warning _${direction}linkcheck: no chain input"); return (0); } | |
915 my $begin=$chain->{'begin'}; # the name of the first element | |
916 my $end=$chain->{'end'}; # the name of the last element | |
917 my ($label,@array,$me,$it,$itpoints); | |
918 if ($direction eq "up") { | |
919 $flow=2; # used to determine the direction of chain navigation | |
920 $wolf=1; | |
921 $label=$end; # start from end | |
922 } else { | |
923 $flow=1; # used to determine the direction of chain navigation | |
924 $wolf=2; | |
925 $label=$begin; # start from beginning | |
926 } | |
927 my $warncode=1; | |
928 | |
929 while ($label) { # proceed with linked elements, checking neighbours | |
930 $me=$label; | |
931 @array=@{$chain->{$label}}; | |
932 $label = $array[$flow]; # go to the next one | |
933 $it=$label; | |
934 if ($it) { # no sense in checking if next one not defined (END element) | |
935 @array=@{$chain->{$label}}; | |
936 $itpoints=$array[$wolf]; | |
937 unless ($me==$itpoints) { | |
938 warn "Warning: ${direction}LinkCheck: LINK wrong in $it, that doesn't point back to me ($me). It points to $itpoints\n"; | |
939 $warncode=0; | |
940 } | |
941 } | |
942 } | |
943 return $warncode; | |
944 } | |
945 | |
946 # consistency check for size of chain | |
947 # argument: a chain reference | |
948 # returns: 1 all OK 0 wrong size | |
949 sub _sizecheck { | |
950 my $chain=$_[0]; | |
951 unless($chain) { | |
952 warn ("Warning _sizecheck: no chain input"); return (0); } | |
953 my $begin=$chain->{'begin'}; # the name of the first element | |
954 my $warncode=1; | |
955 my ($label,@array); | |
956 my $size=$chain->{'size'}; | |
957 my $count=0; | |
958 $label=$begin; | |
959 while ($label) { # proceed with linked elements, counting | |
960 @array=@{$chain->{$label}}; | |
961 $label = $array[1]; # go to the next one | |
962 $count++; | |
963 } | |
964 if ($size != $count) { | |
965 warn "Size check reports error: assumed size: $size, real size: $count "; | |
966 $warncode=0; | |
967 } | |
968 return $warncode; | |
969 } | |
970 | |
971 | |
972 # consistency check for begin and end (boundaries) | |
973 # argument: a chain reference | |
974 # returns: 1 all OK 0 problems | |
975 sub _boundcheck { | |
976 my $chain=$_[0]; | |
977 unless($chain) { | |
978 warn ("Warning _boundcheck: no chain input"); return (0); } | |
979 my $begin=$chain->{'begin'}; # the name of the first element | |
980 my $end=$chain->{'end'}; # the name of the (supposedly) last element | |
981 my $warncode=1; | |
982 | |
983 # check SYNC of beginning | |
984 if (($begin)&&($chain->{$begin})) { # if the BEGIN points to existing element | |
985 if ($chain->{$begin}[2]) { # if BEGIN element has PREV not undef | |
986 warn "Warning: BEGIN element has PREV field defined \n"; | |
987 warn "\tWDEBUG begin: $begin\t"; | |
988 warn "\tWDEBUG begin's PREV: $chain->{$begin}[2] \n"; | |
989 $warncode=0; | |
990 } | |
991 } else { | |
992 warn "Warning: BEGIN key of chain does not point to existing element!\n"; | |
993 warn "\tWDEBUG begin: $begin\n"; | |
994 $warncode=0; | |
995 } | |
996 # check SYNC of end | |
997 if (($end)&&($chain->{$end})) { # if the END points to an existing element | |
998 if ($chain->{$end}[1]) { # if END element has NEXT not undef | |
999 warn "Warning: END element has NEXT field defined \n"; | |
1000 warn "\tWDEBUG end: $end\t"; | |
1001 warn "\tWDEBUG end's NEXT: $chain->{$end}[1] \n"; | |
1002 $warncode=0; | |
1003 } | |
1004 } else { | |
1005 warn "Warning: END key of chain does not point to existing element!\n"; | |
1006 warn "\tWDEBUG end: $end\n"; | |
1007 $warncode=0; | |
1008 } | |
1009 return $warncode; | |
1010 } | |
1011 | |
1012 # arguments: chain_ref | |
1013 # returns: the size of the chain (the number of elements) | |
1014 # return code -1: unexistant chain, errors... | |
1015 sub chain_length { | |
1016 my $chain=$_[0]; | |
1017 unless($chain) { | |
1018 warn ("Warning chain_length: no chain input"); return (-1); } | |
1019 my $size=$chain->{'size'}; | |
1020 if ($size) { | |
1021 return ($size); | |
1022 } else { | |
1023 return (-1); | |
1024 } | |
1025 } | |
1026 | |
1027 # arguments: chain ref, first element name, second element name | |
1028 # returns: 1 or 0 (1 ok, 0 errors) | |
1029 sub _join_chain_elements { | |
1030 my $chain=$_[0]; | |
1031 unless($chain) { | |
1032 warn ("Warning _join_chain_elements: no chain input"); return (0); } | |
1033 my $leftelem=$_[1]; | |
1034 my $rightelem=$_[2]; | |
1035 unless(($leftelem)&&($rightelem)) { | |
1036 warn ("Warning _join_chain_elements: element arguments??"); return (0); } | |
1037 if (($chain->{$leftelem})&&($chain->{$rightelem})) { # if the elements exist | |
1038 $chain->{$leftelem}[1]=$rightelem; | |
1039 $chain->{$rightelem}[2]=$leftelem; | |
1040 return 1; | |
1041 } else { | |
1042 warn ("Warning _join_chain_elements: elements not defined"); | |
1043 return 0; | |
1044 } | |
1045 } | |
1046 | |
1047 =head2 splice_chain | |
1048 | |
1049 Title : splice_chain | |
1050 Usage : @errorcodes = Bio::LiveSeq::Chain::splice_chain($chainref,$first,$length,$last) | |
1051 Function: removes the elements designated by FIRST and LENGTH from a chain. | |
1052 The chain shrinks accordingly. If LENGTH is omitted, removes | |
1053 everything from FIRST onward. | |
1054 If END is specified, LENGTH is ignored and instead the removal | |
1055 occurs from FIRST to LAST. | |
1056 Returns : the elements removed as a string | |
1057 Errorcode: -1 | |
1058 Args : chainref, integer, integer, integer | |
1059 | |
1060 =cut | |
1061 | |
1062 sub splice_chain { | |
1063 my $chain=$_[0]; | |
1064 unless($chain) { | |
1065 warn ("Warning splice_chain: no chain input"); return (-1); } | |
1066 my $begin=$chain->{'begin'}; # the name of the first element | |
1067 my $end=$chain->{'end'}; # the name of the (supposedly) last element | |
1068 my $first=$_[1]; | |
1069 unless (($first eq 0)||($first)) { $first=$begin; } # if undef, use $begin | |
1070 my $len=$_[2]; | |
1071 my $last=$_[3]; | |
1072 my (@array, $string); | |
1073 my ($beforecut,$aftercut); | |
1074 | |
1075 unless($chain->{$first}) { | |
1076 warn ("Warning splice_chain: first element not defined"); return (-1); } | |
1077 if ($last) { # if last is defined, it gets priority and len is not used | |
1078 unless($chain->{$last}) { | |
1079 warn ("Warning splice_chain: last element not defined"); return (-1); } | |
1080 if ($len) { | |
1081 warn ("Warning splice_chain: argument LAST:$last overriding LEN:$len!"); | |
1082 undef $len; | |
1083 } | |
1084 } else { | |
1085 $last=$end; # if last not defined, go 'till end (or to len, whichever 1st) | |
1086 } | |
1087 | |
1088 $beforecut=$chain->{$first}[2]; # what's the element before 1st deleted? | |
1089 # if it is undef then it means we are splicing since the beginning | |
1090 | |
1091 my $i=1; | |
1092 my $label=$first; | |
1093 my $afterlast=$chain->{$last}[1]; # if $last=$end $afterlast should be undef | |
1094 unless (defined $afterlast) { $afterlast=0; } # keep strict happy | |
1095 | |
1096 # proceed for len elements or until the end, whichever comes first | |
1097 # if len undef goes till last | |
1098 while (($label)&&($label != $afterlast) && ($i <= ($len || $i + 1))) { | |
1099 @array=@{$chain->{$label}}; | |
1100 $string .= $array[0]; | |
1101 $aftercut = $array[1]; # what's the element next last deleted? | |
1102 # also used as savevar to change label posdeletion | |
1103 delete $chain->{$label}; # this can be deleted now | |
1104 $label=$aftercut; # label is updated using the savevar | |
1105 $i++; | |
1106 } | |
1107 | |
1108 # Now fix the chain (sticky edges, fields) | |
1109 # 4 cases: cut in the middle, cut from beginning, cut till end, cut all | |
1110 #print "\n\tstickyDEBUG beforecut: $beforecut "; # DEBUG | |
1111 #print "\taftercut: $aftercut \n"; # DEBUG | |
1112 if ($beforecut) { | |
1113 if ($aftercut) { # 1st case, middle cut | |
1114 _join_chain_elements($chain,$beforecut,$aftercut); | |
1115 } else { # 3rd case, end cut | |
1116 $chain->{'end'}=$beforecut; # update the END field | |
1117 $chain->{$beforecut}[1]=undef; # since we cut till the end | |
1118 } | |
1119 } else { | |
1120 if ($aftercut) { # 2nd case, begin cut | |
1121 $chain->{'begin'}=$aftercut; # update the BEGIN field | |
1122 $chain->{$aftercut}[2]=undef; # since we cut from beginning | |
1123 } else { # 4th case, all has been cut | |
1124 $chain->{'begin'}=undef; | |
1125 $chain->{'end'}=undef; | |
1126 } | |
1127 } | |
1128 $chain->{'size'}=($chain->{'size'}) - $i + 1; # update the SIZE field | |
1129 | |
1130 return $string; | |
1131 } | |
1132 | |
1133 | |
1134 # arguments: CHAIN_REF POSITION [FIRST] | |
1135 # returns: element counting POSITION from FIRST or from START if FIRST undef | |
1136 # i.e. returns the element at POSITION counting from FIRST | |
1137 #sub element_at_pos { | |
1138 #croak "Warning: old method name. Please update code to 'down_get_label_at_position'\n"; | |
1139 ##&down_element_at_pos; | |
1140 #} | |
1141 #sub up_element_at_pos { | |
1142 ## old wraparound | |
1143 ##my @array=up_chain2string($_[0],$_[2],$_[1],undef,"elements"); | |
1144 ##return $array[-1]; | |
1145 #croak "old method name. Update code to: up_get_label_at_position"; | |
1146 ##&up_get_label_at_pos; | |
1147 #} | |
1148 #sub down_element_at_pos { | |
1149 ## old wraparound | |
1150 ##my @array=down_chain2string($_[0],$_[2],$_[1],undef,"elements"); | |
1151 ##return $array[-1]; | |
1152 #croak "old method name. Update code to: down_get_label_at_position"; | |
1153 ##&down_get_label_at_pos; | |
1154 #} | |
1155 | |
1156 # arguments: CHAIN_REF ELEMENT [FIRST] | |
1157 # returns: the position of ELEMENT counting from FIRST or from START | |
1158 #i if FIRST is undef | |
1159 # i.e. returns the Number of elements between FIRST and ELEMENT | |
1160 # i.e. returns the position of element taking FIRST as 1 of coordinate system | |
1161 #sub pos_of_element { | |
1162 #croak ("Warning: old and ambiguous method name. Please update code to 'down_get_pos_of_label'\n"); | |
1163 ##&down_pos_of_element; | |
1164 #} | |
1165 #sub up_pos_of_element { | |
1166 #croak ("Warning: old method name. Please update code to 'up_get_pos_of_label'\n"); | |
1167 ##up_chain2string($_[0],$_[2],undef,$_[1],"counting"); | |
1168 #} | |
1169 #sub down_pos_of_element { | |
1170 #croak ("Warning: old method name. Please update code to 'down_get_pos_of_label'\n"); | |
1171 ##down_chain2string($_[0],$_[2],undef,$_[1],"counting"); | |
1172 #} | |
1173 | |
1174 # wraparounds to calculate length of subchain from first to last | |
1175 # arguments: chain_ref [first] [last] | |
1176 #sub subchain_length { | |
1177 #croak "Warning: old method name. Please update code to 'down_subchain_length'\n"; | |
1178 ##&down_subchain_length; | |
1179 #} | |
1180 | |
1181 # wraparounds to have elements output | |
1182 # same arguments as chain2string | |
1183 # returns label|name of every element | |
1184 #sub elements { | |
1185 #croak ("Warning: method no more supported. Please update code to 'down_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n"); | |
1186 ##&down_elements; | |
1187 #} | |
1188 #sub up_elements { | |
1189 #croak ("Warning: method no more supported. Please update code to 'up_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n"); | |
1190 ##up_chain2string($_[0],$_[1],$_[2],$_[3],"elements"); | |
1191 #} | |
1192 #sub down_elements { | |
1193 #croak ("Warning: method no more supported. Please update code to 'down_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n"); | |
1194 ##down_chain2string($_[0],$_[1],$_[2],$_[3],"elements"); | |
1195 #} | |
1196 | |
1197 # wraparounds to have verbose output | |
1198 # same arguments as chain2string | |
1199 # returns the chain in a very verbose way | |
1200 sub chain2string_verbose { | |
1201 carp "Warning: method no more supported.\n"; | |
1202 &old_down_chain2string_verbose; | |
1203 } | |
1204 sub up_chain2string_verbose { | |
1205 carp "Warning: method no more supported.\n"; | |
1206 old_up_chain2string($_[0],$_[1],$_[2],$_[3],"verbose"); | |
1207 } | |
1208 sub down_chain2string_verbose { | |
1209 carp "Warning: method no more supported.\n"; | |
1210 old_down_chain2string($_[0],$_[1],$_[2],$_[3],"verbose"); | |
1211 } | |
1212 | |
1213 #sub chain2string { | |
1214 #croak ("Warning: old method name. Please update code to 'down_chain2string'\n"); | |
1215 ##&down_chain2string; | |
1216 #} | |
1217 sub old_up_chain2string { | |
1218 old_updown_chain2string("up",@_); | |
1219 } | |
1220 sub old_down_chain2string { | |
1221 old_updown_chain2string("down",@_); | |
1222 } | |
1223 | |
1224 # common to up_chain2string and down_chain2string | |
1225 # arguments: "up"||"down" chain_ref [first] [len] [last] [option] | |
1226 # [option] can be any of "verbose", "counting", "elements" | |
1227 # error: return -1 | |
1228 # defaults: start = first element; if len undef, goes to last | |
1229 # if last undef, goes to end | |
1230 # if last def it overrides len (that gets undef) | |
1231 # returns: a string | |
1232 # example usage: down_chain2string($chain) -> all the chain from begin to end | |
1233 # example usage: down_chain2string($chain,6) -> from 6 to the end | |
1234 # example usage: down_chain2string($chain,6,4) -> from 6, going on 4 elements | |
1235 # example usage: down_chain2string($chain,6,"",10) -> from 6 to 10 | |
1236 # example usage: up_chain2string($chain,10,"",6) -> from 10 to 6 upstream | |
1237 sub old_updown_chain2string { | |
1238 my ($direction,$chain,$first,$len,$last,$option)=@_; | |
1239 unless($chain) { | |
1240 warn ("Warning chain2string: no chain input"); return (-1); } | |
1241 my $begin=$chain->{'begin'}; # the name of the BEGIN element | |
1242 my $end=$chain->{'end'}; # the name of the END element | |
1243 my $flow; | |
1244 if ($direction eq "up") { | |
1245 $flow=2; # used to determine the direction of chain navigation | |
1246 unless ($first) { $first=$end; } # if undef or 0, use $end | |
1247 } else { # defaults to "down" | |
1248 $flow=1; # used to determine the direction of chain navigation | |
1249 unless ($first) { $first=$begin; } # if undef or 0, use $begin | |
1250 } | |
1251 | |
1252 unless($chain->{$first}) { | |
1253 warn ("Warning chain2string: first element not defined"); return (-1); } | |
1254 if ($last) { # if last is defined, it gets priority and len is not used | |
1255 unless($chain->{$last}) { | |
1256 warn ("Warning chain2string: last element not defined"); return (-1); } | |
1257 if ($len) { | |
1258 warn ("Warning chain2string: argument LAST:$last overriding LEN:$len!"); | |
1259 undef $len; | |
1260 } | |
1261 } else { | |
1262 if ($direction eq "up") { | |
1263 $last=$begin; # if last not defined, go 'till begin (or upto len elements) | |
1264 } else { | |
1265 $last=$end; # if last not defined, go 'till end (or upto len elements) | |
1266 } | |
1267 } | |
1268 my (@array, $string, $count); | |
1269 # call for verbosity (by way of chain2string_verbose); | |
1270 my $verbose=0; my $elements=0; my @elements; my $counting=0; | |
1271 if ($option) { # keep strict happy | |
1272 if ($option eq "verbose") { $verbose=1; } | |
1273 if ($option eq "elements") { $elements=1; } | |
1274 if ($option eq "counting") { $counting=1; } | |
1275 } | |
1276 | |
1277 if ($verbose) { | |
1278 print "BEGIN=$begin"; print " END=$end"; print " SIZE=$chain->{'size'}"; | |
1279 print " FIRSTFREE=$chain->{'firstfree'} \n"; | |
1280 } | |
1281 | |
1282 my $i=1; | |
1283 my $label=$first; | |
1284 my $afterlast=$chain->{$last}[$flow]; # if $last=$end $afterlast should be undef | |
1285 unless (defined $afterlast) { $afterlast=0; } # keep strict happy | |
1286 | |
1287 # proceed for len elements or until last, whichever comes first | |
1288 # if $len undef goes till end | |
1289 while (($label)&&($label != $afterlast) && ($i <= ($len || $i + 1))) { | |
1290 @array=@{$chain->{$label}}; | |
1291 if ($verbose) { | |
1292 $string .= "$array[2]_${label}_$array[1]=$array[0] "; | |
1293 $count++; | |
1294 } elsif ($elements) { | |
1295 push (@elements,$label); # returning element names/references/identifiers | |
1296 } elsif ($counting) { | |
1297 $count++; | |
1298 } else { | |
1299 $string .= $array[0]; # returning element content | |
1300 } | |
1301 $label = $array[$flow]; # go to next||prev i.e. downstream||upstream | |
1302 $i++; | |
1303 } | |
1304 #DEBUG#print "len: $len, first: $first, last: $last, afterlast=$afterlast \n"; | |
1305 if ($verbose) { print "TOTALprinted: $count\n"; } | |
1306 if ($counting) { | |
1307 return $count; | |
1308 } elsif ($elements) { | |
1309 return @elements; | |
1310 } else { | |
1311 return $string; | |
1312 } | |
1313 } | |
1314 | |
1315 # sub string2schain | |
1316 # --------> deleted, no more supported <-------- | |
1317 # creation of a single linked list/chain from a string | |
1318 # basically could be recreated by taking the *2chain methods and | |
1319 # omitting to set the 3rd field (label 2) containing the back links | |
1320 | |
1321 | |
1322 # creation of a double linked list/chain from a string | |
1323 # returns reference to a hash containing the chain | |
1324 # arguments: STRING [OFFSET] | |
1325 # defaults: OFFSET defaults to 1 if undef | |
1326 # the chain will contain as elements the single characters in the string | |
1327 sub string2chain { | |
1328 my @string=split(//,$_[0]); | |
1329 array2chain(\@string,$_[1]); | |
1330 } | |
1331 | |
1332 =head2 array2chain | |
1333 | |
1334 Title : array2chain | |
1335 Usage : $chainref = Bio::LiveSeq::Chain::array2chain($arrayref,$offset) | |
1336 Function: creation of a double linked chain from an array | |
1337 Returns : reference to a hash containing the chain | |
1338 Defaults: OFFSET defaults to 1 if undef | |
1339 Error code: 0 | |
1340 Args : a reference to an array containing the elements to be chainlinked | |
1341 an optional integer > 0 (this will be the starting count for | |
1342 the chain labels instead than having them begin from "1") | |
1343 | |
1344 =cut | |
1345 | |
1346 sub array2chain { | |
1347 my $arrayref=$_[0]; | |
1348 my $array_count=scalar(@{$arrayref}); | |
1349 unless ($array_count) { | |
1350 warn ("Warning array2chain: no elements input"); return (0); } | |
1351 my $begin=$_[1]; | |
1352 if (defined $begin) { | |
1353 if ($begin < 1) { | |
1354 warn "Warning array2chain: Zero or Negative offsets not allowed"; return (0); } | |
1355 } else { | |
1356 $begin=1; | |
1357 } | |
1358 my ($element,%hash); | |
1359 $hash{'begin'}=$begin; | |
1360 my $i=$begin-1; | |
1361 foreach $element (@{$arrayref}) { | |
1362 $i++; | |
1363 # hash with keys begin..end pointing to the arrays | |
1364 $hash{$i}=[$element,$i+1,$i-1]; | |
1365 } | |
1366 my $end=$i; | |
1367 $hash{'end'}=$end; | |
1368 $hash{firstfree}=$i+1; # what a new added element should be called | |
1369 $hash{size}=$end-$begin+1; # how many elements in the chain | |
1370 | |
1371 # eliminate pointers to unexisting elements | |
1372 $hash{$begin}[2]=undef; | |
1373 $hash{$end}[1]=undef; | |
1374 | |
1375 return (\%hash); | |
1376 } | |
1377 | |
1378 1; # returns 1 |