0
|
1 #!/usr/bin/perl
|
|
2 # $Id: Chain.pm,v 1.12 2001/06/18 08:27:53 heikki Exp $
|
|
3 #
|
|
4 # bioperl module for Bio::LiveSeq::Chain
|
|
5 #
|
|
6 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
|
|
7 #
|
|
8 # Copyright Joseph Insana
|
|
9 #
|
|
10 # You may distribute this module under the same terms as perl itself
|
|
11 #
|
|
12 # POD documentation - main docs before the code
|
|
13 #
|
|
14
|
|
15 =head1 NAME
|
|
16
|
|
17 Bio::LiveSeq::Chain - DoubleChain DataStructure for Perl
|
|
18
|
|
19 =head1 SYNOPSIS
|
|
20
|
|
21 #documentation needed
|
|
22
|
|
23 =head1 DESCRIPTION
|
|
24
|
|
25 This is a general purpose module (that's why it's not in object-oriented
|
|
26 form) that introduces a novel datastructure in PERL. It implements
|
|
27 the "double linked chain". The elements of the chain can contain basically
|
|
28 everything. From chars to strings, from object references to arrays or hashes.
|
|
29 It is used in the LiveSequence project to create a dynamical DNA sequence,
|
|
30 easier to manipulate and change. It's use is mainly for sequence variation
|
|
31 analysis but it could be used - for example - in e-cell projects.
|
|
32 The Chain module in itself doesn't have any biological bias, so can be
|
|
33 used for any programming purpose.
|
|
34
|
|
35 Each element of the chain (with the exclusion of the first and the last of the
|
|
36 chain) is connected to other two elements (the PREVious and the NEXT one).
|
|
37 There is no absolute position (like in an array), hence if positions are
|
|
38 important, they need to be computed (methods are provided).
|
|
39 Otherwise it's easy to keep track of the elements with their "LABELs".
|
|
40 There is one LABEL (think of it as a pointer) to each ELEMENT. The labels
|
|
41 won't change after insertions or deletions of the chain. So it's
|
|
42 always possible to retrieve an element even if the chain has been
|
|
43 modified by successive insertions or deletions.
|
|
44 From this the high potential profit for bioinformatics: dealing with
|
|
45 sequences in a way that doesn't have to rely on positions, without
|
|
46 the need of constantly updating them if the sequence changes, even
|
|
47 dramatically.
|
|
48
|
|
49 =head1 AUTHOR - Joseph A.L. Insana
|
|
50
|
|
51 Email: Insana@ebi.ac.uk, jinsana@gmx.net
|
|
52
|
|
53 Address:
|
|
54
|
|
55 EMBL Outstation, European Bioinformatics Institute
|
|
56 Wellcome Trust Genome Campus, Hinxton
|
|
57 Cambs. CB10 1SD, United Kingdom
|
|
58
|
|
59 =head1 APPENDIX
|
|
60
|
|
61 The rest of the documentation details each of the object
|
|
62 methods. Internal methods are usually preceded with a _
|
|
63
|
|
64 =cut
|
|
65
|
|
66 # Let the code begin...
|
|
67
|
|
68 # DoubleChain Data Structure for PERL
|
|
69 # by Joseph A.L. Insana - Deathson - Filius Mortis - Fal Mortais
|
|
70 # insana@ebi.ac.uk, jinsana@gmx.net
|
|
71
|
|
72 package Bio::LiveSeq::Chain;
|
|
73 # Version history:
|
|
74 # Fri Mar 10 16:46:51 GMT 2000 v1.0 begun working on chains in perl
|
|
75 # Sat Mar 11 05:47:21 GMT 2000 v.1.4 working on splice method
|
|
76 # Sun Mar 12 14:08:31 GMT 2000 v.1.5
|
|
77 # Sun Mar 12 17:21:51 GMT 2000 v.2.0 splice method working, is_updownstream made
|
|
78 # Sun Mar 12 18:11:22 GMT 2000 v.2.04 wrapped all in package Chain.pm
|
|
79 # Sun Mar 12 18:49:23 GMT 2000 v.2.08 added elements()
|
|
80 # Sun Mar 12 21:18:04 GMT 2000 v.2.1 done array2dchain, working on *insert*
|
|
81 # Sun Mar 12 23:04:40 GMT 2000 v.2.16 done *insert*, up_element, create_elems
|
|
82 # Sun Mar 12 23:45:32 GMT 2000 v.2.17 debugged and checked
|
|
83 # Mon Mar 13 00:44:51 GMT 2000 v.2.2 added mutate()
|
|
84 # Mon Mar 13 02:00:32 GMT 2000 v 2.21 added invert_dchain()
|
|
85 # Mon Mar 13 03:01:21 GMT 2000 v 2.22 created updown_chain2string
|
|
86 # Mon Mar 13 03:45:50 GMT 2000 v.2.24 added subchain_length()
|
|
87 # Mon Mar 13 17:25:04 GMT 2000 v.2.26 added element_at_pos and pos_of_element
|
|
88 # Wed Mar 15 23:05:06 GMT 2000 v.2.27 use strict enforced
|
|
89 # Thu Mar 16 19:05:34 GMT 2000 v.2.3 changed dchain->chain everywhere
|
|
90 # Fri Mar 17 01:48:36 GMT 2000 v.2.33 mutate_element renamed, created new
|
|
91 # methods: set_value, get_value...
|
|
92 # Fri Mar 17 05:03:15 GMT 2000 v.2.4 set_value_at_pos, get_value_at_pos
|
|
93 # get_label_at_pos...
|
|
94 # Fri Mar 17 15:51:07 GMT 2000 v.2.41 renamed pos_of_element -> get_pos_of_label
|
|
95 # Fri Mar 17 18:10:36 GMT 2000 v.2.44 recoded subchain_length and pos_of_label
|
|
96 # Fri Mar 17 20:12:27 GMT 2000 v.2.5 NAMING change: index->label everywhere
|
|
97 # Mon Mar 20 18:33:10 GMT 2000 v.2.52 label_exists(), start(), end()
|
|
98 # Mon Mar 20 23:10:28 GMT 2000 v.2.6 labels() created
|
|
99 # Wed Mar 22 18:35:17 GMT 2000 v.2.61 chain2string() rewritten
|
|
100 # Tue Dec 12 14:47:58 GMT 2000 v 2.66 optimized with /use integer/
|
|
101 # Tue Dec 12 16:28:45 GMT 2000 v 2.7 rewritten comments to methods in pod style
|
|
102
|
|
103 #
|
|
104 $VERSION=2.7;
|
|
105 #
|
|
106 # TODO_list:
|
|
107 # **** cleanup code
|
|
108 # **** performance concerns
|
|
109 # *??* create hash2dchain ???? (with hashkeys used for label)
|
|
110 # **????** how about using array of arrays instead than hash of arrays??
|
|
111 #
|
|
112 # further strict complaints:
|
|
113 # in verbose $string assignment around line 721 ???
|
|
114
|
|
115 # TERMINOLOGY update, naming convention:
|
|
116 # "chain" the datastructure
|
|
117 # "element" the individual units that compose a chain
|
|
118 # "label" the unique name of a single element
|
|
119 # "position" the position of an element into the chain according to a
|
|
120 # particular coordinate system (e.g. counting from the start)
|
|
121 # "value" what is stored in a single element
|
|
122
|
|
123 use Carp qw(croak cluck carp); # as of 2.3
|
|
124 use strict; # as of 2.27
|
|
125 use integer; # WARNING: this is to increase performance
|
|
126 # a little bit of attention has to be given if float need to
|
|
127 # be stored as elements of the array
|
|
128 # the use of this "integer" affects all operations but not
|
|
129 # assignments. So float CAN be assigned as elements of the chain
|
|
130 # BUT, if you assign $z=-1.8;, $z will be equal to -1 because
|
|
131 # "-" counts as a unary operation!
|
|
132
|
|
133 =head2 _updown_chain2string
|
|
134
|
|
135 Title : chain2string
|
|
136 Usage : $string = Bio::LiveSeq::Chain::chain2string("down",$chain,6,9)
|
|
137 Function: reads the contents of the chain, outputting a string
|
|
138 Returns : a string
|
|
139 Examples:
|
|
140 : down_chain2string($chain) -> all the chain from begin to end
|
|
141 : down_chain2string($chain,6) -> from 6 to the end
|
|
142 : down_chain2string($chain,6,4) -> from 6, going on 4 elements
|
|
143 : down_chain2string($chain,6,"",10) -> from 6 to 10
|
|
144 : up_chain2string($chain,10,"",6) -> from 10 to 6 upstream
|
|
145 Defaults: start=first element; if len undef, goes to last
|
|
146 if last undef, goes to end
|
|
147 if last defined, it overrides len (undefining it)
|
|
148 Error code: -1
|
|
149 Args : "up"||"down" as first argument to specify the reading direction
|
|
150 reference (to the chain)
|
|
151 [first] [len] [last] optional integer arguments to specify how
|
|
152 much and from (and to) where to read
|
|
153
|
|
154 =cut
|
|
155
|
|
156 # methods rewritten 2.61
|
|
157 sub up_chain2string {
|
|
158 _updown_chain2string("up",@_);
|
|
159 }
|
|
160 sub down_chain2string {
|
|
161 _updown_chain2string("down",@_);
|
|
162 }
|
|
163
|
|
164 sub _updown_chain2string {
|
|
165 my ($direction,$chain,$first,$len,$last)=@_;
|
|
166 unless($chain) { cluck "no chain input"; return (-1); }
|
|
167 my $begin=$chain->{'begin'}; # the label of the BEGIN element
|
|
168 my $end=$chain->{'end'}; # the label of the END element
|
|
169 my $flow;
|
|
170
|
|
171 if ($direction eq "up") {
|
|
172 $flow=2; # used to determine the direction of chain navigation
|
|
173 unless ($first) { $first=$end; } # if undef or 0, use $end
|
|
174 } else { # defaults to "down"
|
|
175 $flow=1; # used to determine the direction of chain navigation
|
|
176 unless ($first) { $first=$begin; } # if undef or 0, use $begin
|
|
177 }
|
|
178
|
|
179 unless($chain->{$first}) {
|
|
180 cluck "label for first not defined"; return (-1); }
|
|
181 if ($last) { # if last is defined, it gets priority and len is not used
|
|
182 unless($chain->{$last}) {
|
|
183 cluck "label for last not defined"; return (-1); }
|
|
184 if ($len) {
|
|
185 warn "Warning chain2string: argument LAST:$last overriding LEN:$len!";
|
|
186 undef $len;
|
|
187 }
|
|
188 } else {
|
|
189 if ($direction eq "up") {
|
|
190 $last=$begin; # if last not defined, go 'till begin (or upto len elements)
|
|
191 } else {
|
|
192 $last=$end; # if last not defined, go 'till end (or upto len elements)
|
|
193 }
|
|
194 }
|
|
195
|
|
196 my ($string,@array);
|
|
197 my $label=$first; my $i=1;
|
|
198 my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef
|
|
199 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
|
|
200
|
|
201 # proceed for len elements or until last, whichever comes first
|
|
202 # if $len undef goes till end
|
|
203 while (($label) && ($label != $afterlast) && ($i <= ($len || $i + 1))) {
|
|
204 @array=@{$chain->{$label}};
|
|
205 $string .= $array[0];
|
|
206 $label = $array[$flow];
|
|
207 $i++;
|
|
208 }
|
|
209 return ($string); # if chain is interrupted $string won't be complete
|
|
210 }
|
|
211
|
|
212 =head2 _updown_labels
|
|
213
|
|
214 Title : labels
|
|
215 Usage : @labels = Bio::LiveSeq::Chain::_updown_labels("down",$chain,4,16)
|
|
216 Function: returns all the labels in a chain or those between two
|
|
217 specified ones (termed "first" and "last")
|
|
218 Returns : a reference to an array containing the labels
|
|
219 Args : "up"||"down" as first argument to specify the reading direction
|
|
220 reference (to the chain)
|
|
221 [first] [last] (integer for the starting and eneding labels)
|
|
222
|
|
223 =cut
|
|
224
|
|
225
|
|
226 # arguments: CHAIN_REF [FIRSTLABEL] [LASTLABEL]
|
|
227 # returns: reference to array containing the labels
|
|
228 sub down_labels {
|
|
229 my ($chain,$first,$last)=@_;
|
|
230 _updown_labels("down",$chain,$first,$last);
|
|
231 }
|
|
232 sub up_labels {
|
|
233 my ($chain,$first,$last)=@_;
|
|
234 _updown_labels("up",$chain,$first,$last);
|
|
235 }
|
|
236 # arguments: "up"||"down" CHAIN_REF [FIRSTLABEL] [LASTLABEL]
|
|
237 # returns: reference to array containing the labels
|
|
238 sub _updown_labels {
|
|
239 my ($direction,$chain,$first,$last)=@_;
|
|
240 unless($chain) { cluck "no chain input"; return (0); }
|
|
241 my $begin=$chain->{'begin'}; # the label of the BEGIN element
|
|
242 my $end=$chain->{'end'}; # the label of the END element
|
|
243 my $flow;
|
|
244 if ($direction eq "up") { $flow=2;
|
|
245 unless ($first) { $first=$end; }
|
|
246 unless ($last) { $last=$begin; }
|
|
247 } else { $flow=1;
|
|
248 unless ($last) { $last=$end; }
|
|
249 unless ($first) { $first=$begin; }
|
|
250 }
|
|
251 unless($chain->{$first}) { warn "not existing label $first"; return (0); }
|
|
252 unless($chain->{$last}) { warn "not existing label $last"; return (0); }
|
|
253
|
|
254 my $label=$first; my @labels;
|
|
255 my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef
|
|
256 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
|
|
257
|
|
258 while (($label)&&($label != $afterlast)) {
|
|
259 push(@labels,$label);
|
|
260 $label=$chain->{$label}[$flow];
|
|
261 }
|
|
262 return (\@labels); # if chain is interrupted @labels won't be complete
|
|
263 }
|
|
264
|
|
265
|
|
266 =head2 start
|
|
267
|
|
268 Title : start
|
|
269 Usage : $start = Bio::LiveSeq::Chain::start()
|
|
270 Returns : the label marking the start of the chain
|
|
271 Errorcode: -1
|
|
272 Args : none
|
|
273
|
|
274 =cut
|
|
275
|
|
276 sub start {
|
|
277 my $chain=$_[0];
|
|
278 unless($chain) { cluck "no chain input"; return (-1); }
|
|
279 return ($chain->{'begin'});
|
|
280 }
|
|
281
|
|
282 =head2 end
|
|
283
|
|
284 Title : end
|
|
285 Usage : $end = Bio::LiveSeq::Chain::end()
|
|
286 Returns : the label marking the end of the chain
|
|
287 Errorcode: -1
|
|
288 Args : none
|
|
289
|
|
290 =cut
|
|
291
|
|
292 sub end {
|
|
293 my $chain=$_[0];
|
|
294 unless($chain) { cluck "no chain input"; return (-1); }
|
|
295 return ($chain->{'end'});
|
|
296 }
|
|
297
|
|
298 =head2 label_exists
|
|
299
|
|
300 Title : label_exists
|
|
301 Usage : $check = Bio::LiveSeq::Chain::label_exists($chain,$label)
|
|
302 Function: It checks if a label is defined, i.e. if an element is there or
|
|
303 is not there anymore
|
|
304 Returns : 1 if the label exists, 0 if it is not there, -1 error
|
|
305 Errorcode: -1
|
|
306 Args : reference to the chain, integer
|
|
307
|
|
308 =cut
|
|
309
|
|
310 sub label_exists {
|
|
311 my ($chain,$label)=@_;
|
|
312 unless($chain) { cluck "no chain input"; return (-1); }
|
|
313 if ($label && $chain->{$label}) { return (1); } else { return (0) };
|
|
314 }
|
|
315
|
|
316
|
|
317 =head2 down_get_pos_of_label
|
|
318
|
|
319 Title : down_get_pos_of_label
|
|
320 Usage : $position = Bio::LiveSeq::Chain::down_get_pos_of_label($chain,$label,$first)
|
|
321 Function: returns the position of $label counting from $first, i.e. taking
|
|
322 $first as 1 of coordinate system. If $first is not specified it will
|
|
323 count from the start of the chain.
|
|
324 Returns :
|
|
325 Errorcode: 0
|
|
326 Args : reference to the chain, integer (the label of interest)
|
|
327 optional: integer (a different label that will be taken as the
|
|
328 first one, i.e. the one to count from)
|
|
329 Note: It counts "downstream". To proceed backward use up_get_pos_of_label
|
|
330
|
|
331 =cut
|
|
332
|
|
333 sub down_get_pos_of_label {
|
|
334 #down_chain2string($_[0],$_[2],undef,$_[1],"counting");
|
|
335 my ($chain,$label,$first)=@_;
|
|
336 _updown_count("down",$chain,$first,$label);
|
|
337 }
|
|
338 sub up_get_pos_of_label {
|
|
339 #up_chain2string($_[0],$_[2],undef,$_[1],"counting");
|
|
340 my ($chain,$label,$first)=@_;
|
|
341 _updown_count("up",$chain,$first,$label);
|
|
342 }
|
|
343
|
|
344 =head2 down_subchain_length
|
|
345
|
|
346 Title : down_subchain_length
|
|
347 Usage : $length = Bio::LiveSeq::Chain::down_subchain_length($chain,$first,$last)
|
|
348 Function: returns the length of the chain between the labels "first" and "last", included
|
|
349 Returns : integer
|
|
350 Errorcode: 0
|
|
351 Args : reference to the chain, integer, integer
|
|
352 Note: It counts "downstream". To proceed backward use up_subchain_length
|
|
353
|
|
354 =cut
|
|
355
|
|
356 # arguments: chain_ref [first] [last]
|
|
357 # returns the length of the chain between first and last (included)
|
|
358 sub down_subchain_length {
|
|
359 #down_chain2string($_[0],$_[1],undef,$_[2],"counting");
|
|
360 my ($chain,$first,$last)=@_;
|
|
361 _updown_count("down",$chain,$first,$last);
|
|
362 }
|
|
363 sub up_subchain_length {
|
|
364 #up_chain2string($_[0],$_[1],undef,$_[2],"counting");
|
|
365 my ($chain,$first,$last)=@_;
|
|
366 _updown_count("up",$chain,$first,$last);
|
|
367 }
|
|
368
|
|
369 # arguments: DIRECTION CHAIN_REF FIRSTLABEL LASTLABEL
|
|
370 # errorcode 0
|
|
371 sub _updown_count {
|
|
372 my ($direction,$chain,$first,$last)=@_;
|
|
373 unless($chain) { cluck "no chain input"; return (0); }
|
|
374 my $begin=$chain->{'begin'}; # the label of the BEGIN element
|
|
375 my $end=$chain->{'end'}; # the label of the END element
|
|
376 my $flow;
|
|
377 if ($direction eq "up") { $flow=2;
|
|
378 unless ($first) { $first=$end; }
|
|
379 unless ($last) { $last=$begin; }
|
|
380 } else { $flow=1;
|
|
381 unless ($last) { $last=$end; }
|
|
382 unless ($first) { $first=$begin; }
|
|
383 }
|
|
384 unless($chain->{$first}) { warn "not existing label $first"; return (0); }
|
|
385 unless($chain->{$last}) { warn "not existing label $last"; return (0); }
|
|
386
|
|
387 my $label=$first; my $count;
|
|
388 my $afterlast=$chain->{$last}[$flow]; # if last=end, afterlast is undef
|
|
389 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
|
|
390
|
|
391 while (($label)&&($label != $afterlast)) {
|
|
392 $count++;
|
|
393 $label=$chain->{$label}[$flow];
|
|
394 }
|
|
395 return ($count); # if chain is interrupted, $i will be up to the breaking point
|
|
396 }
|
|
397
|
|
398 =head2 invert_chain
|
|
399
|
|
400 Title : invert_chain
|
|
401 Usage : $errorcode=Bio::LiveSeq::Chain::invert_chain($chain)
|
|
402 Function: completely inverts the order of the chain elements; begin is swapped with end and all links updated (PREV&NEXT fields swapped)
|
|
403 Returns : 1 if all OK, 0 if errors
|
|
404 Errorcode: 0
|
|
405 Args : reference to the chain
|
|
406
|
|
407 =cut
|
|
408
|
|
409 sub invert_chain {
|
|
410 my $chain=$_[0];
|
|
411 unless($chain) { cluck "no chain input"; return (0); }
|
|
412 my $begin=$chain->{'begin'}; # the name of the first element
|
|
413 my $end=$chain->{'end'}; # the name of the last element
|
|
414 my ($label,@array);
|
|
415 $label=$begin; # starts from the beginning
|
|
416 while ($label) { # proceed with linked elements, swapping PREV and NEXT
|
|
417 @array=@{$chain->{$label}};
|
|
418 ($chain->{$label}[1],$chain->{$label}[2])=($array[2],$array[1]); # swap
|
|
419 $label = $array[1]; # go to the next one
|
|
420 }
|
|
421 # now swap begin and end fields
|
|
422 ($chain->{'begin'},$chain->{'end'})=($end,$begin);
|
|
423 return (1); # that's it
|
|
424 }
|
|
425
|
|
426 # warning that method has changed name
|
|
427 #sub mutate_element {
|
|
428 #croak "Warning: old method name. Please update code to 'set_value_at_label'\n";
|
|
429 # &set_value_at_label;
|
|
430 #}
|
|
431
|
|
432 =head2 down_get_value_at_pos
|
|
433
|
|
434 Title : down_get_value_at_pos
|
|
435 Usage : $value = Bio::LiveSeq::Chain::down_get_value_at_pos($chain,$position,$first)
|
|
436 Function: used to access the value of the chain at a particular position instead than directly with a label pointer. It will count the position from the start of the chain or from the label $first, if $first is specified
|
|
437 Returns : whatever is stored in the element of the chain
|
|
438 Errorcode: 0
|
|
439 Args : reference to the chain, integer, [integer]
|
|
440 Note: It works "downstream". To proceed backward use up_get_value_at_pos
|
|
441
|
|
442 =cut
|
|
443
|
|
444 #sub get_value_at_pos {
|
|
445 #croak "Please use instead: down_get_value_at_pos";
|
|
446 ##&down_get_value_at_pos;
|
|
447 #}
|
|
448 sub down_get_value_at_pos {
|
|
449 my ($chain,$position,$first)=@_;
|
|
450 my $label=down_get_label_at_pos($chain,$position,$first);
|
|
451 # check place of change
|
|
452 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
|
|
453 warn "not existing element $label"; return (0); }
|
|
454 return _get_value($chain,$label);
|
|
455 }
|
|
456 sub up_get_value_at_pos {
|
|
457 my ($chain,$position,$first)=@_;
|
|
458 my $label=up_get_label_at_pos($chain,$position,$first);
|
|
459 # check place of change
|
|
460 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
|
|
461 warn "not existing element $label"; return (0); }
|
|
462 return _get_value($chain,$label);
|
|
463 }
|
|
464
|
|
465 =head2 down_set_value_at_pos
|
|
466
|
|
467 Title : down_set_value_at_pos
|
|
468 Usage : $errorcode = Bio::LiveSeq::Chain::down_set_value_at_pos($chain,$newvalue,$position,$first)
|
|
469 Function: used to store a new value inside an element of the chain at a particular position instead than directly with a label pointer. It will count the position from the start of the chain or from the label $first, if $first is specified
|
|
470 Returns : 1
|
|
471 Errorcode: 0
|
|
472 Args : reference to the chain, newvalue, integer, [integer]
|
|
473 (newvalue can be: integer, string, object reference, hash ref)
|
|
474 Note: It works "downstream". To proceed backward use up_set_value_at_pos
|
|
475 Note2: If the $newvalue is undef, it will delete the contents of the
|
|
476 element but it won't remove the element from the chain.
|
|
477
|
|
478 =cut
|
|
479
|
|
480 #sub set_value_at_pos {
|
|
481 #croak "Please use instead: down_set_value_at_pos";
|
|
482 ##&down_set_value_at_pos;
|
|
483 #}
|
|
484 sub down_set_value_at_pos {
|
|
485 my ($chain,$value,$position,$first)=@_;
|
|
486 my $label=down_get_label_at_pos($chain,$position,$first);
|
|
487 # check place of change
|
|
488 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
|
|
489 warn "not existing element $label"; return (0); }
|
|
490 _set_value($chain,$label,$value);
|
|
491 return (1);
|
|
492 }
|
|
493 sub up_set_value_at_pos {
|
|
494 my ($chain,$value,$position,$first)=@_;
|
|
495 my $label=up_get_label_at_pos($chain,$position,$first);
|
|
496 # check place of change
|
|
497 if (($label eq -1)||($label eq 0)) { # complain if label doesn't exist
|
|
498 warn "not existing element $label"; return (0); }
|
|
499 _set_value($chain,$label,$value);
|
|
500 return (1);
|
|
501 }
|
|
502
|
|
503
|
|
504 =head2 down_set_value_at_label
|
|
505
|
|
506 Title : down_set_value_at_label
|
|
507 Usage : $errorcode = Bio::LiveSeq::Chain::down_set_value_at_label($chain,$newvalue,$label)
|
|
508 Function: used to store a new value inside an element of the chain defined by its label.
|
|
509 Returns : 1
|
|
510 Errorcode: 0
|
|
511 Args : reference to the chain, newvalue, integer
|
|
512 (newvalue can be: integer, string, object reference, hash ref)
|
|
513 Note: It works "downstream". To proceed backward use up_set_value_at_label
|
|
514 Note2: If the $newvalue is undef, it will delete the contents of the
|
|
515 element but it won't remove the element from the chain.
|
|
516
|
|
517 =cut
|
|
518
|
|
519 sub set_value_at_label {
|
|
520 my ($chain,$value,$label)=@_;
|
|
521 unless($chain) { cluck "no chain input"; return (0); }
|
|
522
|
|
523 # check place of change
|
|
524 unless($chain->{$label}) { # complain if label doesn't exist
|
|
525 warn "not existing element $label"; return (0); }
|
|
526 _set_value($chain,$label,$value);
|
|
527 return (1);
|
|
528 }
|
|
529
|
|
530 =head2 down_get_value_at_label
|
|
531
|
|
532 Title : down_get_value_at_label
|
|
533 Usage : $value = Bio::LiveSeq::Chain::down_get_value_at_label($chain,$label)
|
|
534 Function: used to access the value of the chain from one element defined by its label.
|
|
535 Returns : whatever is stored in the element of the chain
|
|
536 Errorcode: 0
|
|
537 Args : reference to the chain, integer
|
|
538 Note: It works "downstream". To proceed backward use up_get_value_at_label
|
|
539
|
|
540 =cut
|
|
541
|
|
542 sub get_value_at_label {
|
|
543 my $chain=$_[0];
|
|
544 unless($chain) { cluck "no chain input"; return (0); }
|
|
545 my $label = $_[1]; # the name of the element
|
|
546
|
|
547 # check place of change
|
|
548 unless($chain->{$label}) { # complain if label doesn't exist
|
|
549 warn "not existing label $label"; return (0); }
|
|
550 return _get_value($chain,$label);
|
|
551 }
|
|
552
|
|
553 # arguments: CHAIN_REF LABEL VALUE
|
|
554 sub _set_value {
|
|
555 my ($chain,$label,$value)=@_;
|
|
556 $chain->{$label}[0]=$value;
|
|
557 }
|
|
558 # arguments: CHAIN_REF LABEL
|
|
559 sub _get_value {
|
|
560 my ($chain,$label)=@_;
|
|
561 return $chain->{$label}[0];
|
|
562 }
|
|
563
|
|
564 =head2 down_get_label_at_pos
|
|
565
|
|
566 Title : down_get_label_at_pos
|
|
567 Usage : $label = Bio::LiveSeq::Chain::down_get_label_at_pos($chain,$position,$first)
|
|
568 Function: used to retrieve the label of an an element of the chain at a particular position. It will count the position from the start of the chain or from the label $first, if $first is specified
|
|
569 Returns : integer
|
|
570 Errorcode: 0
|
|
571 Args : reference to the chain, integer, [integer]
|
|
572 Note: It works "downstream". To proceed backward use up_get_label_at_pos
|
|
573
|
|
574 =cut
|
|
575
|
|
576 # arguments: CHAIN_REF POSITION [FIRST]
|
|
577 # returns: LABEL of element found counting from FIRST
|
|
578 sub down_get_label_at_pos {
|
|
579 _updown_get_label_at_pos("down",@_);
|
|
580 }
|
|
581 sub up_get_label_at_pos {
|
|
582 _updown_get_label_at_pos("up",@_);
|
|
583 }
|
|
584
|
|
585 # arguments: [DIRECTION] CHAIN_REF POSITION [FIRST]
|
|
586 # Default DIRECTION="down"
|
|
587 # if FIRST is undefined, FIRST=START (if DIRECTION=down) or FIRST=END (up)
|
|
588
|
|
589 sub _updown_get_label_at_pos {
|
|
590 my ($direction,$chain,$position,$first)=@_;
|
|
591 unless($chain) { cluck "no chain input"; return (0); }
|
|
592 my $begin=$chain->{'begin'}; # the label of the BEGIN element
|
|
593 my $end=$chain->{'end'}; # the label of the END element
|
|
594 my $flow;
|
|
595 if ($direction eq "up") { $flow=2; unless ($first) { $first=$end; }
|
|
596 } else { $flow=1; unless ($first) { $first=$begin; } }
|
|
597 unless($chain->{$first}) { warn "not existing label $first"; return (0); }
|
|
598
|
|
599 my $label=$first;
|
|
600 my $i=1;
|
|
601 while ($i < $position) {
|
|
602 $label=$chain->{$label}[$flow];
|
|
603 $i++;
|
|
604 unless ($label) { return (0); } # chain ended before position reached
|
|
605 }
|
|
606 return ($label);
|
|
607 }
|
|
608
|
|
609 # for english_concerned, latin_unconcerned people
|
|
610 sub preinsert_string { &praeinsert_string }
|
|
611 sub preinsert_array { &praeinsert_array }
|
|
612
|
|
613 # praeinsert_string CHAIN_REF STRING [POSITION]
|
|
614 # the chars of STRING are passed to praeinsert_array
|
|
615 # the chars are inserted in CHAIN, before POSITION
|
|
616 # if POSITION is undef, default is to prepend the string to the beginning
|
|
617 # i.e. POSITION is START of CHAIN
|
|
618 sub praeinsert_string {
|
|
619 my @string=split(//,$_[1]);
|
|
620 praeinsert_array($_[0],\@string,$_[2]);
|
|
621 }
|
|
622
|
|
623 # postinsert_string CHAIN_REF STRING [POSITION]
|
|
624 # the chars of STRING are passed to postinsert_array
|
|
625 # the chars are inserted in CHAIN, after POSITION
|
|
626 # if POSITION is undef, default is to append the string to the end
|
|
627 # i.e. POSITION is END of CHAIN
|
|
628 sub postinsert_string {
|
|
629 my @string=split(//,$_[1]);
|
|
630 postinsert_array($_[0],\@string,$_[2]);
|
|
631 }
|
|
632
|
|
633 # praeinsert_array CHAIN_REF ARRAY_REF [POSITION]
|
|
634 # the elements of ARRAY are inserted in CHAIN, before POSITION
|
|
635 # if POSITION is undef, default is to prepend the elements to the beginning
|
|
636 # i.e. POSITION is START of CHAIN
|
|
637 sub praeinsert_array {
|
|
638 _praepostinsert_array($_[0],"prae",$_[1],$_[2]);
|
|
639 }
|
|
640
|
|
641 # postinsert_array CHAIN_REF ARRAY_REF [POSITION]
|
|
642 # the elements of ARRAY are inserted in CHAIN, after POSITION
|
|
643 # if POSITION is undef, default is to append the elements to the end
|
|
644 # i.e. POSITION is END of CHAIN
|
|
645 sub postinsert_array {
|
|
646 _praepostinsert_array($_[0],"post",$_[1],$_[2]);
|
|
647 }
|
|
648
|
|
649
|
|
650 =head2 _praepostinsert_array
|
|
651
|
|
652 Title : _praepostinsert_array
|
|
653 Usage : ($insbegin,$insend) = Bio::LiveSeq::Chain::_praepostinsert_array($chainref,"post",$arrayref,$position)
|
|
654 Function: the elements of the array specified by $arrayref are inserted (creating a new subchain) in the chain specified by $chainref, before or after (depending on the "prae"||"post" keyword passed as second argument) the specified position.
|
|
655 Returns : two labels: the first and the last of the inserted subchain
|
|
656 Defaults: if no position is specified, the new chain will be inserted after
|
|
657 (post) the first element of the chain
|
|
658 Errorcode: 0
|
|
659 Args : chainref, "prae"||"post", arrayref, integer (position)
|
|
660
|
|
661 =cut
|
|
662
|
|
663 # returns: 0 if errors, otherwise returns references of begin and end of
|
|
664 # the insertion
|
|
665 sub _praepostinsert_array {
|
|
666 my $chain=$_[0];
|
|
667 unless($chain) { cluck "no chain input"; return (0); }
|
|
668 my $praepost=$_[1] || "post"; # defaults to post
|
|
669 my ($prae,$post);
|
|
670 my $position=$_[3];
|
|
671 my $begin=$chain->{'begin'}; # the name of the first element of the chain
|
|
672 my $end=$chain->{'end'}; # the name of the the last element of the chain
|
|
673 # check if prae or post insertion and prepare accordingly
|
|
674 if ($praepost eq "prae") {
|
|
675 $prae=1;
|
|
676 unless (($position eq 0)||($position)) { $position=$begin; } # if undef, use $begin
|
|
677 } else {
|
|
678 $post=1;
|
|
679 unless (($position eq 0)||($position)) { $position=$end; } # if undef, use $end
|
|
680 }
|
|
681 # check place of insertion
|
|
682 unless($chain->{$position}) { # complain if position doesn't exist
|
|
683 warn ("Warning _praepostinsert_array: not existing element $position");
|
|
684 return (0);
|
|
685 }
|
|
686
|
|
687 # check if there are elements to insert
|
|
688 my $elements=$_[2]; # reference to the array containing the new elements
|
|
689 my $elements_count=scalar(@{$elements});
|
|
690 unless ($elements_count) {
|
|
691 warn ("Warning _praepostinsert_array: no elements input"); return (0); }
|
|
692
|
|
693 # create new chainelements with offset=firstfree(chain)
|
|
694 my ($insertbegin,$insertend)=_create_chain_elements($chain,$elements);
|
|
695
|
|
696 # DEBUGGING
|
|
697 #print "Executing ${praepost}insertion of $elements_count elements ('@{$elements}') at position: $position\n";
|
|
698
|
|
699 # attach the new chain to the old chain
|
|
700 # 4 cases: prae@begin, prae@middle, post@middle, post@end
|
|
701 # NOTE: in case of double joinings always join wisely so not to
|
|
702 # delete the PREV/NEXT attribute before it is needed
|
|
703 my $noerror=1;
|
|
704 if ($prae) {
|
|
705 if ($position==$begin) { # 1st case: prae@begin
|
|
706 $noerror=_join_chain_elements($chain,$insertend,$begin);
|
|
707 $chain->{'begin'}=$insertbegin;
|
|
708 } else { # 2nd case: prae@middle
|
|
709 $noerror=_join_chain_elements($chain,up_element($chain,$position),$insertbegin);
|
|
710 $noerror=_join_chain_elements($chain,$insertend,$position);
|
|
711 }
|
|
712 } elsif ($post) {
|
|
713 if ($position==$end) { # 4th case: post@end
|
|
714 $noerror=_join_chain_elements($chain,$end,$insertbegin);
|
|
715 $chain->{'end'}=$insertend;
|
|
716 } else { # 3rd case: post@middle # note the order of joins (important)
|
|
717 $noerror=_join_chain_elements($chain,$insertend,down_element($chain,$position));
|
|
718 $noerror=_join_chain_elements($chain,$position,$insertbegin);
|
|
719 }
|
|
720 } else { # this should never happen
|
|
721 die "_praepostinsert_array: Something went very wrong";
|
|
722 }
|
|
723
|
|
724 # check for errors and return begin,end of insertion
|
|
725 if ($noerror) {
|
|
726 return ($insertbegin,$insertend);
|
|
727 } else { # something went wrong with the joinings
|
|
728 warn "Warning _praepostinsert_array: Joining of insertion failed";
|
|
729 return (0);
|
|
730 }
|
|
731 }
|
|
732
|
|
733 # create new chain elements with offset=firstfree
|
|
734 # arguments: CHAIN_REF ARRAY_REF
|
|
735 # returns: pointers to BEGIN and END of new chained elements created
|
|
736 # returns 0 if error(s) encountered
|
|
737 sub _create_chain_elements {
|
|
738 my $chain=$_[0];
|
|
739 unless($chain) {
|
|
740 warn ("Warning _create_chain_elements: no chain input"); return (0); }
|
|
741 my $arrayref=$_[1];
|
|
742 my $array_count=scalar(@{$arrayref});
|
|
743 unless ($array_count) {
|
|
744 warn ("Warning _create_chain_elements: no elements input"); return (0); }
|
|
745 my $begin=$chain->{'firstfree'};
|
|
746 my $i=$begin-1;
|
|
747 my $element;
|
|
748 foreach $element (@{$arrayref}) {
|
|
749 $i++;
|
|
750 $chain->{$i}=[$element,$i+1,$i-1];
|
|
751 }
|
|
752 my $end=$i;
|
|
753 $chain->{'firstfree'}=$i+1; # what a new added element should be called
|
|
754 $chain->{'size'} += $end-$begin+1; # increase size of chain
|
|
755 # leave sticky edges (to be joined by whoever called this subroutine)
|
|
756 $chain->{$begin}[2]=undef;
|
|
757 $chain->{$end}[1]=undef;
|
|
758 return ($begin,$end); # return pointers to first and last of the newelements
|
|
759 }
|
|
760
|
|
761 # argument: CHAIN_REF ELEMENT
|
|
762 # returns: name of DOWN/NEXT element (the downstream one)
|
|
763 # returns -1 if error encountered (e.g. chain or elements undefined)
|
|
764 # returns 0 if there's no DOWN element
|
|
765 sub down_element {
|
|
766 _updown_element("down",@_);
|
|
767 }
|
|
768 # argument: CHAIN_REF ELEMENT
|
|
769 # returns: name of UP/PREV element (the upstream one)
|
|
770 # returns -1 if error encountered (e.g. chain or elements undefined)
|
|
771 # returns 0 if there's no UP element
|
|
772 sub up_element {
|
|
773 _updown_element("up",@_);
|
|
774 }
|
|
775
|
|
776 # used by both is_up_element and down_element
|
|
777 sub _updown_element {
|
|
778 my $direction=$_[0] || "down"; # defaults to downstream
|
|
779 my $flow;
|
|
780 if ($direction eq "up") {
|
|
781 $flow=2; # used to determine the direction of chain navigation
|
|
782 } else {
|
|
783 $flow=1; # used to determine the direction of chain navigation
|
|
784 }
|
|
785 my $chain=$_[1];
|
|
786 unless($chain) {
|
|
787 warn ("Warning ${direction}_element: no chain input"); return (-1); }
|
|
788 my $me = $_[2]; # the name of the element
|
|
789 my $it = $chain->{$me}[$flow]; # the prev||next one, upstream||downstream
|
|
790 if ($it) {
|
|
791 return ($it); # return the name of prev||next element
|
|
792 } else {
|
|
793 return (0); # there is no prev||next element ($it is undef)
|
|
794 }
|
|
795 }
|
|
796
|
|
797 # used by both is_downstream and is_upstream
|
|
798 sub _is_updownstream {
|
|
799 my $direction=$_[0] || "down"; # defaults to downstream
|
|
800 my $flow;
|
|
801 if ($direction eq "up") {
|
|
802 $flow=2; # used to determine the direction of chain navigation
|
|
803 } else {
|
|
804 $flow=1; # used to determine the direction of chain navigation
|
|
805 }
|
|
806 my $chain=$_[1];
|
|
807 unless($chain) {
|
|
808 warn ("Warning is_${direction}stream: no chain input"); return (-1); }
|
|
809 my $first=$_[2]; # the name of the first element
|
|
810 my $second=$_[3]; # the name of the first element
|
|
811 if ($first==$second) {
|
|
812 warn ("Warning is_${direction}stream: first==second!!"); return (0); }
|
|
813 unless($chain->{$first}) {
|
|
814 warn ("Warning is_${direction}stream: first element not defined"); return (-1); }
|
|
815 unless($chain->{$second}) {
|
|
816 warn ("Warning is_${direction}stream: second element not defined"); return (-1); }
|
|
817 my ($label,@array);
|
|
818 $label=$first;
|
|
819 my $found=0;
|
|
820 while (($label)&&(!($found))) { # searches till the end or till found
|
|
821 if ($label==$second) {
|
|
822 $found=1;
|
|
823 }
|
|
824 @array=@{$chain->{$label}};
|
|
825 $label = $array[$flow]; # go to the prev||next one, upstream||downstream
|
|
826 }
|
|
827 return $found;
|
|
828 }
|
|
829
|
|
830 =head2 is_downstream
|
|
831
|
|
832 Title : is_downstream
|
|
833 Usage : Bio::LiveSeq::Chain::is_downstream($chainref,$firstlabel,$secondlabel)
|
|
834 Function: checks if SECONDlabel follows FIRSTlabel
|
|
835 It runs downstream the elements of the chain from FIRST searching
|
|
836 for SECOND.
|
|
837 Returns : 1 if SECOND is found /after/ FIRST; 0 otherwise (i.e. if it
|
|
838 reaches the end of the chain without having found it)
|
|
839 Errorcode -1
|
|
840 Args : two labels (integer)
|
|
841
|
|
842 =cut
|
|
843
|
|
844 sub is_downstream {
|
|
845 _is_updownstream("down",@_);
|
|
846 }
|
|
847
|
|
848 =head2 is_upstream
|
|
849
|
|
850 Title : is_upstream
|
|
851 Usage : Bio::LiveSeq::Chain::is_upstream($chainref,$firstlabel,$secondlabel)
|
|
852 Function: checks if SECONDlabel follows FIRSTlabel
|
|
853 It runs upstream the elements of the chain from FIRST searching
|
|
854 for SECOND.
|
|
855 Returns : 1 if SECOND is found /after/ FIRST; 0 otherwise (i.e. if it
|
|
856 reaches the end of the chain without having found it)
|
|
857 Errorcode -1
|
|
858 Args : two labels (integer)
|
|
859
|
|
860 =cut
|
|
861
|
|
862 sub is_upstream {
|
|
863 _is_updownstream("up",@_);
|
|
864 }
|
|
865
|
|
866 =head2 check_chain
|
|
867
|
|
868 Title : check_chain
|
|
869 Usage : @errorcodes = Bio::LiveSeq::Chain::check_chain()
|
|
870 Function: a wraparound to a series of check for consistency of the chain
|
|
871 It will check for boundaries, size, backlinking and forwardlinking
|
|
872 Returns : array of 4 warn codes, each can be 1 (all ok) or 0 (something wrong)
|
|
873 Errorcode: 0
|
|
874 Args : none
|
|
875 Note : this is slow and through. It is not really needed. It is mostly
|
|
876 a code-developer tool.
|
|
877
|
|
878 =cut
|
|
879
|
|
880 sub check_chain {
|
|
881 my $chain=$_[0];
|
|
882 unless($chain) {
|
|
883 warn ("Warning check_chain: no chain input"); return (-1); }
|
|
884 my ($warnbound,$warnsize,$warnbacklink,$warnforlink);
|
|
885 $warnbound=&_boundcheck; # passes on the arguments of the subroutine
|
|
886 $warnsize=&_sizecheck;
|
|
887 $warnbacklink=&_downlinkcheck;
|
|
888 $warnforlink=&_uplinkcheck;
|
|
889 return ($warnbound,$warnsize,$warnbacklink,$warnforlink);
|
|
890 }
|
|
891
|
|
892 # consistency check for forwardlinks walking upstream
|
|
893 # argument: a chain reference
|
|
894 # returns: 1 all OK 0 problems
|
|
895 sub _uplinkcheck {
|
|
896 _updownlinkcheck("up",@_);
|
|
897 }
|
|
898
|
|
899 # consistency check for backlinks walking downstream
|
|
900 # argument: a chain reference
|
|
901 # returns: 1 all OK 0 problems
|
|
902 sub _downlinkcheck {
|
|
903 _updownlinkcheck("down",@_);
|
|
904 }
|
|
905
|
|
906 # consistency check for links, common to _uplinkcheck and _downlinkcheck
|
|
907 # argument: "up"||"down", check_ref
|
|
908 # returns: 1 all OK 0 problems
|
|
909 sub _updownlinkcheck {
|
|
910 my $direction=$_[0] || "down"; # defaults to downstream
|
|
911 my ($flow,$wolf);
|
|
912 my $chain=$_[1];
|
|
913 unless($chain) {
|
|
914 warn ("Warning _${direction}linkcheck: no chain input"); return (0); }
|
|
915 my $begin=$chain->{'begin'}; # the name of the first element
|
|
916 my $end=$chain->{'end'}; # the name of the last element
|
|
917 my ($label,@array,$me,$it,$itpoints);
|
|
918 if ($direction eq "up") {
|
|
919 $flow=2; # used to determine the direction of chain navigation
|
|
920 $wolf=1;
|
|
921 $label=$end; # start from end
|
|
922 } else {
|
|
923 $flow=1; # used to determine the direction of chain navigation
|
|
924 $wolf=2;
|
|
925 $label=$begin; # start from beginning
|
|
926 }
|
|
927 my $warncode=1;
|
|
928
|
|
929 while ($label) { # proceed with linked elements, checking neighbours
|
|
930 $me=$label;
|
|
931 @array=@{$chain->{$label}};
|
|
932 $label = $array[$flow]; # go to the next one
|
|
933 $it=$label;
|
|
934 if ($it) { # no sense in checking if next one not defined (END element)
|
|
935 @array=@{$chain->{$label}};
|
|
936 $itpoints=$array[$wolf];
|
|
937 unless ($me==$itpoints) {
|
|
938 warn "Warning: ${direction}LinkCheck: LINK wrong in $it, that doesn't point back to me ($me). It points to $itpoints\n";
|
|
939 $warncode=0;
|
|
940 }
|
|
941 }
|
|
942 }
|
|
943 return $warncode;
|
|
944 }
|
|
945
|
|
946 # consistency check for size of chain
|
|
947 # argument: a chain reference
|
|
948 # returns: 1 all OK 0 wrong size
|
|
949 sub _sizecheck {
|
|
950 my $chain=$_[0];
|
|
951 unless($chain) {
|
|
952 warn ("Warning _sizecheck: no chain input"); return (0); }
|
|
953 my $begin=$chain->{'begin'}; # the name of the first element
|
|
954 my $warncode=1;
|
|
955 my ($label,@array);
|
|
956 my $size=$chain->{'size'};
|
|
957 my $count=0;
|
|
958 $label=$begin;
|
|
959 while ($label) { # proceed with linked elements, counting
|
|
960 @array=@{$chain->{$label}};
|
|
961 $label = $array[1]; # go to the next one
|
|
962 $count++;
|
|
963 }
|
|
964 if ($size != $count) {
|
|
965 warn "Size check reports error: assumed size: $size, real size: $count ";
|
|
966 $warncode=0;
|
|
967 }
|
|
968 return $warncode;
|
|
969 }
|
|
970
|
|
971
|
|
972 # consistency check for begin and end (boundaries)
|
|
973 # argument: a chain reference
|
|
974 # returns: 1 all OK 0 problems
|
|
975 sub _boundcheck {
|
|
976 my $chain=$_[0];
|
|
977 unless($chain) {
|
|
978 warn ("Warning _boundcheck: no chain input"); return (0); }
|
|
979 my $begin=$chain->{'begin'}; # the name of the first element
|
|
980 my $end=$chain->{'end'}; # the name of the (supposedly) last element
|
|
981 my $warncode=1;
|
|
982
|
|
983 # check SYNC of beginning
|
|
984 if (($begin)&&($chain->{$begin})) { # if the BEGIN points to existing element
|
|
985 if ($chain->{$begin}[2]) { # if BEGIN element has PREV not undef
|
|
986 warn "Warning: BEGIN element has PREV field defined \n";
|
|
987 warn "\tWDEBUG begin: $begin\t";
|
|
988 warn "\tWDEBUG begin's PREV: $chain->{$begin}[2] \n";
|
|
989 $warncode=0;
|
|
990 }
|
|
991 } else {
|
|
992 warn "Warning: BEGIN key of chain does not point to existing element!\n";
|
|
993 warn "\tWDEBUG begin: $begin\n";
|
|
994 $warncode=0;
|
|
995 }
|
|
996 # check SYNC of end
|
|
997 if (($end)&&($chain->{$end})) { # if the END points to an existing element
|
|
998 if ($chain->{$end}[1]) { # if END element has NEXT not undef
|
|
999 warn "Warning: END element has NEXT field defined \n";
|
|
1000 warn "\tWDEBUG end: $end\t";
|
|
1001 warn "\tWDEBUG end's NEXT: $chain->{$end}[1] \n";
|
|
1002 $warncode=0;
|
|
1003 }
|
|
1004 } else {
|
|
1005 warn "Warning: END key of chain does not point to existing element!\n";
|
|
1006 warn "\tWDEBUG end: $end\n";
|
|
1007 $warncode=0;
|
|
1008 }
|
|
1009 return $warncode;
|
|
1010 }
|
|
1011
|
|
1012 # arguments: chain_ref
|
|
1013 # returns: the size of the chain (the number of elements)
|
|
1014 # return code -1: unexistant chain, errors...
|
|
1015 sub chain_length {
|
|
1016 my $chain=$_[0];
|
|
1017 unless($chain) {
|
|
1018 warn ("Warning chain_length: no chain input"); return (-1); }
|
|
1019 my $size=$chain->{'size'};
|
|
1020 if ($size) {
|
|
1021 return ($size);
|
|
1022 } else {
|
|
1023 return (-1);
|
|
1024 }
|
|
1025 }
|
|
1026
|
|
1027 # arguments: chain ref, first element name, second element name
|
|
1028 # returns: 1 or 0 (1 ok, 0 errors)
|
|
1029 sub _join_chain_elements {
|
|
1030 my $chain=$_[0];
|
|
1031 unless($chain) {
|
|
1032 warn ("Warning _join_chain_elements: no chain input"); return (0); }
|
|
1033 my $leftelem=$_[1];
|
|
1034 my $rightelem=$_[2];
|
|
1035 unless(($leftelem)&&($rightelem)) {
|
|
1036 warn ("Warning _join_chain_elements: element arguments??"); return (0); }
|
|
1037 if (($chain->{$leftelem})&&($chain->{$rightelem})) { # if the elements exist
|
|
1038 $chain->{$leftelem}[1]=$rightelem;
|
|
1039 $chain->{$rightelem}[2]=$leftelem;
|
|
1040 return 1;
|
|
1041 } else {
|
|
1042 warn ("Warning _join_chain_elements: elements not defined");
|
|
1043 return 0;
|
|
1044 }
|
|
1045 }
|
|
1046
|
|
1047 =head2 splice_chain
|
|
1048
|
|
1049 Title : splice_chain
|
|
1050 Usage : @errorcodes = Bio::LiveSeq::Chain::splice_chain($chainref,$first,$length,$last)
|
|
1051 Function: removes the elements designated by FIRST and LENGTH from a chain.
|
|
1052 The chain shrinks accordingly. If LENGTH is omitted, removes
|
|
1053 everything from FIRST onward.
|
|
1054 If END is specified, LENGTH is ignored and instead the removal
|
|
1055 occurs from FIRST to LAST.
|
|
1056 Returns : the elements removed as a string
|
|
1057 Errorcode: -1
|
|
1058 Args : chainref, integer, integer, integer
|
|
1059
|
|
1060 =cut
|
|
1061
|
|
1062 sub splice_chain {
|
|
1063 my $chain=$_[0];
|
|
1064 unless($chain) {
|
|
1065 warn ("Warning splice_chain: no chain input"); return (-1); }
|
|
1066 my $begin=$chain->{'begin'}; # the name of the first element
|
|
1067 my $end=$chain->{'end'}; # the name of the (supposedly) last element
|
|
1068 my $first=$_[1];
|
|
1069 unless (($first eq 0)||($first)) { $first=$begin; } # if undef, use $begin
|
|
1070 my $len=$_[2];
|
|
1071 my $last=$_[3];
|
|
1072 my (@array, $string);
|
|
1073 my ($beforecut,$aftercut);
|
|
1074
|
|
1075 unless($chain->{$first}) {
|
|
1076 warn ("Warning splice_chain: first element not defined"); return (-1); }
|
|
1077 if ($last) { # if last is defined, it gets priority and len is not used
|
|
1078 unless($chain->{$last}) {
|
|
1079 warn ("Warning splice_chain: last element not defined"); return (-1); }
|
|
1080 if ($len) {
|
|
1081 warn ("Warning splice_chain: argument LAST:$last overriding LEN:$len!");
|
|
1082 undef $len;
|
|
1083 }
|
|
1084 } else {
|
|
1085 $last=$end; # if last not defined, go 'till end (or to len, whichever 1st)
|
|
1086 }
|
|
1087
|
|
1088 $beforecut=$chain->{$first}[2]; # what's the element before 1st deleted?
|
|
1089 # if it is undef then it means we are splicing since the beginning
|
|
1090
|
|
1091 my $i=1;
|
|
1092 my $label=$first;
|
|
1093 my $afterlast=$chain->{$last}[1]; # if $last=$end $afterlast should be undef
|
|
1094 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
|
|
1095
|
|
1096 # proceed for len elements or until the end, whichever comes first
|
|
1097 # if len undef goes till last
|
|
1098 while (($label)&&($label != $afterlast) && ($i <= ($len || $i + 1))) {
|
|
1099 @array=@{$chain->{$label}};
|
|
1100 $string .= $array[0];
|
|
1101 $aftercut = $array[1]; # what's the element next last deleted?
|
|
1102 # also used as savevar to change label posdeletion
|
|
1103 delete $chain->{$label}; # this can be deleted now
|
|
1104 $label=$aftercut; # label is updated using the savevar
|
|
1105 $i++;
|
|
1106 }
|
|
1107
|
|
1108 # Now fix the chain (sticky edges, fields)
|
|
1109 # 4 cases: cut in the middle, cut from beginning, cut till end, cut all
|
|
1110 #print "\n\tstickyDEBUG beforecut: $beforecut "; # DEBUG
|
|
1111 #print "\taftercut: $aftercut \n"; # DEBUG
|
|
1112 if ($beforecut) {
|
|
1113 if ($aftercut) { # 1st case, middle cut
|
|
1114 _join_chain_elements($chain,$beforecut,$aftercut);
|
|
1115 } else { # 3rd case, end cut
|
|
1116 $chain->{'end'}=$beforecut; # update the END field
|
|
1117 $chain->{$beforecut}[1]=undef; # since we cut till the end
|
|
1118 }
|
|
1119 } else {
|
|
1120 if ($aftercut) { # 2nd case, begin cut
|
|
1121 $chain->{'begin'}=$aftercut; # update the BEGIN field
|
|
1122 $chain->{$aftercut}[2]=undef; # since we cut from beginning
|
|
1123 } else { # 4th case, all has been cut
|
|
1124 $chain->{'begin'}=undef;
|
|
1125 $chain->{'end'}=undef;
|
|
1126 }
|
|
1127 }
|
|
1128 $chain->{'size'}=($chain->{'size'}) - $i + 1; # update the SIZE field
|
|
1129
|
|
1130 return $string;
|
|
1131 }
|
|
1132
|
|
1133
|
|
1134 # arguments: CHAIN_REF POSITION [FIRST]
|
|
1135 # returns: element counting POSITION from FIRST or from START if FIRST undef
|
|
1136 # i.e. returns the element at POSITION counting from FIRST
|
|
1137 #sub element_at_pos {
|
|
1138 #croak "Warning: old method name. Please update code to 'down_get_label_at_position'\n";
|
|
1139 ##&down_element_at_pos;
|
|
1140 #}
|
|
1141 #sub up_element_at_pos {
|
|
1142 ## old wraparound
|
|
1143 ##my @array=up_chain2string($_[0],$_[2],$_[1],undef,"elements");
|
|
1144 ##return $array[-1];
|
|
1145 #croak "old method name. Update code to: up_get_label_at_position";
|
|
1146 ##&up_get_label_at_pos;
|
|
1147 #}
|
|
1148 #sub down_element_at_pos {
|
|
1149 ## old wraparound
|
|
1150 ##my @array=down_chain2string($_[0],$_[2],$_[1],undef,"elements");
|
|
1151 ##return $array[-1];
|
|
1152 #croak "old method name. Update code to: down_get_label_at_position";
|
|
1153 ##&down_get_label_at_pos;
|
|
1154 #}
|
|
1155
|
|
1156 # arguments: CHAIN_REF ELEMENT [FIRST]
|
|
1157 # returns: the position of ELEMENT counting from FIRST or from START
|
|
1158 #i if FIRST is undef
|
|
1159 # i.e. returns the Number of elements between FIRST and ELEMENT
|
|
1160 # i.e. returns the position of element taking FIRST as 1 of coordinate system
|
|
1161 #sub pos_of_element {
|
|
1162 #croak ("Warning: old and ambiguous method name. Please update code to 'down_get_pos_of_label'\n");
|
|
1163 ##&down_pos_of_element;
|
|
1164 #}
|
|
1165 #sub up_pos_of_element {
|
|
1166 #croak ("Warning: old method name. Please update code to 'up_get_pos_of_label'\n");
|
|
1167 ##up_chain2string($_[0],$_[2],undef,$_[1],"counting");
|
|
1168 #}
|
|
1169 #sub down_pos_of_element {
|
|
1170 #croak ("Warning: old method name. Please update code to 'down_get_pos_of_label'\n");
|
|
1171 ##down_chain2string($_[0],$_[2],undef,$_[1],"counting");
|
|
1172 #}
|
|
1173
|
|
1174 # wraparounds to calculate length of subchain from first to last
|
|
1175 # arguments: chain_ref [first] [last]
|
|
1176 #sub subchain_length {
|
|
1177 #croak "Warning: old method name. Please update code to 'down_subchain_length'\n";
|
|
1178 ##&down_subchain_length;
|
|
1179 #}
|
|
1180
|
|
1181 # wraparounds to have elements output
|
|
1182 # same arguments as chain2string
|
|
1183 # returns label|name of every element
|
|
1184 #sub elements {
|
|
1185 #croak ("Warning: method no more supported. Please update code to 'down_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n");
|
|
1186 ##&down_elements;
|
|
1187 #}
|
|
1188 #sub up_elements {
|
|
1189 #croak ("Warning: method no more supported. Please update code to 'up_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n");
|
|
1190 ##up_chain2string($_[0],$_[1],$_[2],$_[3],"elements");
|
|
1191 #}
|
|
1192 #sub down_elements {
|
|
1193 #croak ("Warning: method no more supported. Please update code to 'down_labels' (NB: now it returns ref to array and doesn't allow length argument!)\n");
|
|
1194 ##down_chain2string($_[0],$_[1],$_[2],$_[3],"elements");
|
|
1195 #}
|
|
1196
|
|
1197 # wraparounds to have verbose output
|
|
1198 # same arguments as chain2string
|
|
1199 # returns the chain in a very verbose way
|
|
1200 sub chain2string_verbose {
|
|
1201 carp "Warning: method no more supported.\n";
|
|
1202 &old_down_chain2string_verbose;
|
|
1203 }
|
|
1204 sub up_chain2string_verbose {
|
|
1205 carp "Warning: method no more supported.\n";
|
|
1206 old_up_chain2string($_[0],$_[1],$_[2],$_[3],"verbose");
|
|
1207 }
|
|
1208 sub down_chain2string_verbose {
|
|
1209 carp "Warning: method no more supported.\n";
|
|
1210 old_down_chain2string($_[0],$_[1],$_[2],$_[3],"verbose");
|
|
1211 }
|
|
1212
|
|
1213 #sub chain2string {
|
|
1214 #croak ("Warning: old method name. Please update code to 'down_chain2string'\n");
|
|
1215 ##&down_chain2string;
|
|
1216 #}
|
|
1217 sub old_up_chain2string {
|
|
1218 old_updown_chain2string("up",@_);
|
|
1219 }
|
|
1220 sub old_down_chain2string {
|
|
1221 old_updown_chain2string("down",@_);
|
|
1222 }
|
|
1223
|
|
1224 # common to up_chain2string and down_chain2string
|
|
1225 # arguments: "up"||"down" chain_ref [first] [len] [last] [option]
|
|
1226 # [option] can be any of "verbose", "counting", "elements"
|
|
1227 # error: return -1
|
|
1228 # defaults: start = first element; if len undef, goes to last
|
|
1229 # if last undef, goes to end
|
|
1230 # if last def it overrides len (that gets undef)
|
|
1231 # returns: a string
|
|
1232 # example usage: down_chain2string($chain) -> all the chain from begin to end
|
|
1233 # example usage: down_chain2string($chain,6) -> from 6 to the end
|
|
1234 # example usage: down_chain2string($chain,6,4) -> from 6, going on 4 elements
|
|
1235 # example usage: down_chain2string($chain,6,"",10) -> from 6 to 10
|
|
1236 # example usage: up_chain2string($chain,10,"",6) -> from 10 to 6 upstream
|
|
1237 sub old_updown_chain2string {
|
|
1238 my ($direction,$chain,$first,$len,$last,$option)=@_;
|
|
1239 unless($chain) {
|
|
1240 warn ("Warning chain2string: no chain input"); return (-1); }
|
|
1241 my $begin=$chain->{'begin'}; # the name of the BEGIN element
|
|
1242 my $end=$chain->{'end'}; # the name of the END element
|
|
1243 my $flow;
|
|
1244 if ($direction eq "up") {
|
|
1245 $flow=2; # used to determine the direction of chain navigation
|
|
1246 unless ($first) { $first=$end; } # if undef or 0, use $end
|
|
1247 } else { # defaults to "down"
|
|
1248 $flow=1; # used to determine the direction of chain navigation
|
|
1249 unless ($first) { $first=$begin; } # if undef or 0, use $begin
|
|
1250 }
|
|
1251
|
|
1252 unless($chain->{$first}) {
|
|
1253 warn ("Warning chain2string: first element not defined"); return (-1); }
|
|
1254 if ($last) { # if last is defined, it gets priority and len is not used
|
|
1255 unless($chain->{$last}) {
|
|
1256 warn ("Warning chain2string: last element not defined"); return (-1); }
|
|
1257 if ($len) {
|
|
1258 warn ("Warning chain2string: argument LAST:$last overriding LEN:$len!");
|
|
1259 undef $len;
|
|
1260 }
|
|
1261 } else {
|
|
1262 if ($direction eq "up") {
|
|
1263 $last=$begin; # if last not defined, go 'till begin (or upto len elements)
|
|
1264 } else {
|
|
1265 $last=$end; # if last not defined, go 'till end (or upto len elements)
|
|
1266 }
|
|
1267 }
|
|
1268 my (@array, $string, $count);
|
|
1269 # call for verbosity (by way of chain2string_verbose);
|
|
1270 my $verbose=0; my $elements=0; my @elements; my $counting=0;
|
|
1271 if ($option) { # keep strict happy
|
|
1272 if ($option eq "verbose") { $verbose=1; }
|
|
1273 if ($option eq "elements") { $elements=1; }
|
|
1274 if ($option eq "counting") { $counting=1; }
|
|
1275 }
|
|
1276
|
|
1277 if ($verbose) {
|
|
1278 print "BEGIN=$begin"; print " END=$end"; print " SIZE=$chain->{'size'}";
|
|
1279 print " FIRSTFREE=$chain->{'firstfree'} \n";
|
|
1280 }
|
|
1281
|
|
1282 my $i=1;
|
|
1283 my $label=$first;
|
|
1284 my $afterlast=$chain->{$last}[$flow]; # if $last=$end $afterlast should be undef
|
|
1285 unless (defined $afterlast) { $afterlast=0; } # keep strict happy
|
|
1286
|
|
1287 # proceed for len elements or until last, whichever comes first
|
|
1288 # if $len undef goes till end
|
|
1289 while (($label)&&($label != $afterlast) && ($i <= ($len || $i + 1))) {
|
|
1290 @array=@{$chain->{$label}};
|
|
1291 if ($verbose) {
|
|
1292 $string .= "$array[2]_${label}_$array[1]=$array[0] ";
|
|
1293 $count++;
|
|
1294 } elsif ($elements) {
|
|
1295 push (@elements,$label); # returning element names/references/identifiers
|
|
1296 } elsif ($counting) {
|
|
1297 $count++;
|
|
1298 } else {
|
|
1299 $string .= $array[0]; # returning element content
|
|
1300 }
|
|
1301 $label = $array[$flow]; # go to next||prev i.e. downstream||upstream
|
|
1302 $i++;
|
|
1303 }
|
|
1304 #DEBUG#print "len: $len, first: $first, last: $last, afterlast=$afterlast \n";
|
|
1305 if ($verbose) { print "TOTALprinted: $count\n"; }
|
|
1306 if ($counting) {
|
|
1307 return $count;
|
|
1308 } elsif ($elements) {
|
|
1309 return @elements;
|
|
1310 } else {
|
|
1311 return $string;
|
|
1312 }
|
|
1313 }
|
|
1314
|
|
1315 # sub string2schain
|
|
1316 # --------> deleted, no more supported <--------
|
|
1317 # creation of a single linked list/chain from a string
|
|
1318 # basically could be recreated by taking the *2chain methods and
|
|
1319 # omitting to set the 3rd field (label 2) containing the back links
|
|
1320
|
|
1321
|
|
1322 # creation of a double linked list/chain from a string
|
|
1323 # returns reference to a hash containing the chain
|
|
1324 # arguments: STRING [OFFSET]
|
|
1325 # defaults: OFFSET defaults to 1 if undef
|
|
1326 # the chain will contain as elements the single characters in the string
|
|
1327 sub string2chain {
|
|
1328 my @string=split(//,$_[0]);
|
|
1329 array2chain(\@string,$_[1]);
|
|
1330 }
|
|
1331
|
|
1332 =head2 array2chain
|
|
1333
|
|
1334 Title : array2chain
|
|
1335 Usage : $chainref = Bio::LiveSeq::Chain::array2chain($arrayref,$offset)
|
|
1336 Function: creation of a double linked chain from an array
|
|
1337 Returns : reference to a hash containing the chain
|
|
1338 Defaults: OFFSET defaults to 1 if undef
|
|
1339 Error code: 0
|
|
1340 Args : a reference to an array containing the elements to be chainlinked
|
|
1341 an optional integer > 0 (this will be the starting count for
|
|
1342 the chain labels instead than having them begin from "1")
|
|
1343
|
|
1344 =cut
|
|
1345
|
|
1346 sub array2chain {
|
|
1347 my $arrayref=$_[0];
|
|
1348 my $array_count=scalar(@{$arrayref});
|
|
1349 unless ($array_count) {
|
|
1350 warn ("Warning array2chain: no elements input"); return (0); }
|
|
1351 my $begin=$_[1];
|
|
1352 if (defined $begin) {
|
|
1353 if ($begin < 1) {
|
|
1354 warn "Warning array2chain: Zero or Negative offsets not allowed"; return (0); }
|
|
1355 } else {
|
|
1356 $begin=1;
|
|
1357 }
|
|
1358 my ($element,%hash);
|
|
1359 $hash{'begin'}=$begin;
|
|
1360 my $i=$begin-1;
|
|
1361 foreach $element (@{$arrayref}) {
|
|
1362 $i++;
|
|
1363 # hash with keys begin..end pointing to the arrays
|
|
1364 $hash{$i}=[$element,$i+1,$i-1];
|
|
1365 }
|
|
1366 my $end=$i;
|
|
1367 $hash{'end'}=$end;
|
|
1368 $hash{firstfree}=$i+1; # what a new added element should be called
|
|
1369 $hash{size}=$end-$begin+1; # how many elements in the chain
|
|
1370
|
|
1371 # eliminate pointers to unexisting elements
|
|
1372 $hash{$begin}[2]=undef;
|
|
1373 $hash{$end}[1]=undef;
|
|
1374
|
|
1375 return (\%hash);
|
|
1376 }
|
|
1377
|
|
1378 1; # returns 1
|