Mercurial > repos > deepakjadmin > mayatool3_test3
comparison mayachemtools/lib/TextUtil.pm @ 0:73ae111cf86f draft
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 11:55:01 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:73ae111cf86f |
---|---|
1 package TextUtil; | |
2 # | |
3 # $RCSfile: TextUtil.pm,v $ | |
4 # $Date: 2015/03/22 20:08:26 $ | |
5 # $Revision: 1.45 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Exporter; | |
31 | |
32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
33 | |
34 @ISA = qw(Exporter); | |
35 @EXPORT = qw(AddNumberSuffix ContainsWhiteSpaces GetTextLine GetTextFileDataByUniqueKey GetTextFileDataByNonUniqueKey HashCode IsEmpty IsNumberPowerOfNumber IsInteger IsPositiveInteger IsFloat IsNotEmpty IsNumerical JoinWords SplitWords QuoteAWord RemoveLeadingWhiteSpaces RemoveTrailingWhiteSpaces RemoveLeadingAndTrailingWhiteSpaces WrapText); | |
36 @EXPORT_OK = qw(); | |
37 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
38 | |
39 # Add number suffix... | |
40 sub AddNumberSuffix { | |
41 my($Value) = @_; | |
42 my($ValueWithSuffix, $Suffix); | |
43 | |
44 $ValueWithSuffix = $Value; | |
45 if (!IsPositiveInteger($Value)) { | |
46 return $ValueWithSuffix; | |
47 } | |
48 $Suffix = "th"; | |
49 if ($Value < 10 || $Value > 20) { | |
50 my $Remainder = $Value % 10; | |
51 $Suffix = ($Remainder == 1) ? "st" : (($Remainder == 2) ? "nd" : (($Remainder == 3) ? "rd" : "th")); | |
52 } | |
53 $ValueWithSuffix = "${ValueWithSuffix}${Suffix}"; | |
54 return $ValueWithSuffix; | |
55 } | |
56 | |
57 # Check out the string: Doen it contain any white space characters? | |
58 sub ContainsWhiteSpaces { | |
59 my($TheString) = @_; | |
60 my($Status) = 0; | |
61 | |
62 if (defined($TheString) && length($TheString)) { | |
63 $Status = ($TheString =~ /[ \t\r\n\f]/ ) ? 1 : 0; | |
64 } | |
65 return $Status; | |
66 } | |
67 | |
68 # Read the line, change to UNIX new line char, and chop off new line char as well... | |
69 sub GetTextLine { | |
70 my($TextFileRef) = @_; | |
71 my($Line) = ''; | |
72 | |
73 # Get the next non empty line... | |
74 LINE: while (defined($_ = <$TextFileRef>)) { | |
75 # Change Windows and Mac new line char to UNIX... | |
76 s/(\r\n)|(\r)/\n/g; | |
77 | |
78 # Take out any new line char at the end by explicitly removing it instead of using | |
79 # chomp, which might not always work correctly on files generated on a system | |
80 # with a value of input line separator different from the current system... | |
81 s/\n$//g; | |
82 | |
83 # Doesn't hurt to chomp... | |
84 chomp; | |
85 | |
86 $Line = $_; | |
87 if (length $Line) { | |
88 last LINE; | |
89 } | |
90 } | |
91 return $Line; | |
92 } | |
93 | |
94 # Load data from a CSV file into the specified hash reference using a specific | |
95 # column for unique data key values. | |
96 # | |
97 # The lines starting with # are treated as comments and ignored. First line | |
98 # not starting with # must contain column labels and the number of columns in | |
99 # all other data rows must match the number of column labels. | |
100 # | |
101 # The first column is assumed to contain data key value by default; all other columns | |
102 # contain data as indicated in their column labels. | |
103 # | |
104 # In order to avoid dependence of data access on the specified column labels, the | |
105 # column data is loaded into hash with Column<Num> hash keys, where column number | |
106 # start from 1. The data key column is not available as Colnum<Num> hash key; | |
107 # | |
108 # The format of the data structure loaded into a specified hash reference is: | |
109 # | |
110 # @{$TextDataMapRef->{DataKeys}} - Array of unique data keys | |
111 # @{$TextDataMapRef->{ColLabels}} - Array of column labels | |
112 # @{$TextDataMapRef->{DataColIDs}} - Array of data column IDs | |
113 # $TextDataMapRef->{NumOfCols} - Number of columns | |
114 # %{$TextDataMapRef->{DataKey}} - Hash keys pair: <DataKey, DataKey> | |
115 # %{$TextDataMapRef->{DataCol<Num>}} - Hash keys pair: <DataCol<Num>, DataKey> | |
116 # | |
117 # Caveats: | |
118 # . The column number start from 1. | |
119 # . Column data for data key column column is not loaded into <Column<Num>, DataKey> hash keys pairs. | |
120 # | |
121 sub GetTextFileDataByUniqueKey { | |
122 my($TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim) = @_; | |
123 | |
124 return _GetTextFileData("UniqueKey", $TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim); | |
125 } | |
126 | |
127 # Load data from a CSV file into the specified hash reference using a specific | |
128 # column for non-unique data key values. | |
129 # | |
130 # The lines starting with # are treated as comments and ignored. First line | |
131 # not starting with # must contain column labels and the number of columns in | |
132 # all other data rows must match the number of column labels. | |
133 # | |
134 # The first column is assumed to contain data key value by default; all other columns | |
135 # contain data as indicated in their column labels. | |
136 # | |
137 # In order to avoid dependence of data access on the specified column labels, the | |
138 # column data is loaded into hash with Column<Num> hash keys, where column number | |
139 # start from 1. The data key column is not available as Colnum<Num> hash key; | |
140 # | |
141 # The format of the data structure loaded into a specified hash reference is: | |
142 # | |
143 # @{$TextDataMapRef->{DataKeys}} - Array of unique data keys | |
144 # @{$TextDataMapRef->{ColLabels}} - Array of column labels | |
145 # @{$TextDataMapRef->{DataColIDs}} - Array of data column IDs | |
146 # $TextDataMapRef->{NumOfCols} - Number of columns | |
147 # %{$TextDataMapRef->{DataKey}} - Hash keys pair: <DataKey, DataKey> | |
148 # @{$TextDataMapRef->{DataCol<Num>}} - Hash keys pair with data as an array: <DataCol<Num>, DataKey> | |
149 # | |
150 # Caveats: | |
151 # . The column number start from 1. | |
152 # . Column data for data key column column is not loaded into <Column<Num>, DataKey> hash keys pairs. | |
153 # | |
154 sub GetTextFileDataByNonUniqueKey { | |
155 my($TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim) = @_; | |
156 | |
157 return _GetTextFileData("NonUniqueKey", $TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim); | |
158 } | |
159 | |
160 # Loadtext file data using unique or non-uniqye data column key... | |
161 # | |
162 sub _GetTextFileData { | |
163 my($DataKeyMode, $TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim) = @_; | |
164 my($DataKeyColIndex, $LineCount, $IgnoredLineCount, $UniqueDataKeyMode, $DataKey, $Line, $NumOfCols, $ColIndex, $ColNum, $ColID, $ColValue, @LineWords, @ColLabels, @DataColIDs, @DataColNums); | |
165 | |
166 print "\nProcessing text data file $TextDataFile...\n"; | |
167 | |
168 $UniqueDataKeyMode = 0; | |
169 if ($DataKeyMode =~ /^UniqueKey$/i) { | |
170 $UniqueDataKeyMode = 1; | |
171 } | |
172 | |
173 # Setup default values... | |
174 $DataKeyColNum = defined $DataKeyColNum ? $DataKeyColNum : 1; | |
175 | |
176 if ($TextDataFile =~ /^\.tsv$/i) { | |
177 $InDelim = "\t"; | |
178 } | |
179 else { | |
180 $InDelim = "\,"; | |
181 if ($InDelim =~ /^semicolon$/i) { | |
182 $InDelim = "\;"; | |
183 } | |
184 } | |
185 | |
186 ($LineCount, $IgnoredLineCount) = (0) x 2; | |
187 | |
188 open TEXTDATAFILE, "$TextDataFile" or die "Couldn't open $TextDataFile: $! ..."; | |
189 | |
190 # Skip lines up to column labels... | |
191 LINE: while ($Line = TextUtil::GetTextLine(\*TEXTDATAFILE)) { | |
192 $LineCount++; | |
193 if ($Line =~ /^#/) { | |
194 $IgnoredLineCount++; | |
195 } | |
196 else { | |
197 last LINE; | |
198 } | |
199 } | |
200 | |
201 # Initialize data map... | |
202 %{$TextDataMapRef} = (); | |
203 @{$TextDataMapRef->{DataKeys}} = (); | |
204 @{$TextDataMapRef->{ColLabels}} = (); | |
205 @{$TextDataMapRef->{DataColIDs}} = (); | |
206 $TextDataMapRef->{NumOfCols} = undef; | |
207 | |
208 # Process column labels... | |
209 @ColLabels= quotewords($InDelim, 0, $Line); | |
210 $NumOfCols = @ColLabels; | |
211 | |
212 if ($DataKeyColNum < 1 || $DataKeyColNum > $NumOfCols) { | |
213 warn "Warning: Ignoring text data file $TextDataFile: Invalid data key column number, $DataKeyColNum, specified. It must be > 0 or <= $NumOfCols, number of columns in the text file ..."; | |
214 return; | |
215 } | |
216 $DataKeyColIndex = $DataKeyColNum - 1; | |
217 | |
218 $TextDataMapRef->{NumOfCols} = $NumOfCols; | |
219 push @{$TextDataMapRef->{ColLabels}}, @ColLabels; | |
220 | |
221 # Set up column data IDs for tracking the data... | |
222 @DataColNums = (); | |
223 @DataColIDs = (); | |
224 COLNUM: for $ColNum (1 .. $NumOfCols) { | |
225 if ($ColNum == $DataKeyColNum) { | |
226 next COLNUM; | |
227 } | |
228 push @DataColNums, $ColNum; | |
229 $ColID = "DataCol${ColNum}"; | |
230 push @DataColIDs, $ColID; | |
231 } | |
232 push @{$TextDataMapRef->{DataColIDs}}, @DataColIDs; | |
233 | |
234 # Initialize column data hash... | |
235 %{$TextDataMapRef->{DataKey}} = (); | |
236 for $ColIndex (0 .. $#DataColNums) { | |
237 $ColNum = $DataColNums[$ColIndex]; | |
238 $ColID = $DataColIDs[$ColIndex]; | |
239 %{$TextDataMapRef->{$ColID}} = (); | |
240 } | |
241 | |
242 LINE: while ($Line = TextUtil::GetTextLine(\*TEXTDATAFILE)) { | |
243 $LineCount++; | |
244 if ($Line =~ /^#/) { | |
245 $IgnoredLineCount++; | |
246 next LINE; | |
247 } | |
248 | |
249 @LineWords = quotewords($InDelim, 0, $Line); | |
250 if (@LineWords != $NumOfCols) { | |
251 $IgnoredLineCount++; | |
252 warn "Warning: The number of data fields, @LineWords, in $TextDataFile must be $NumOfCols.\nIgnoring line number $LineCount: $Line...\n"; | |
253 next LINE; | |
254 } | |
255 $DataKey = $LineWords[$DataKeyColIndex]; | |
256 | |
257 if ($UniqueDataKeyMode) { | |
258 if (exists $TextDataMapRef->{DataKey}{$DataKey}) { | |
259 $IgnoredLineCount++; | |
260 warn "Warning: The data key, $DataKey, in data column key number, $DataKeyColNum, is already present.\nIgnoring line number $LineCount: $Line...\n"; | |
261 next LINE; | |
262 } | |
263 push @{$TextDataMapRef->{DataKeys}}, $DataKey; | |
264 $TextDataMapRef->{DataKey}{$DataKey} = $DataKey; | |
265 } | |
266 else { | |
267 if (!exists $TextDataMapRef->{DataKey}{$DataKey}) { | |
268 push @{$TextDataMapRef->{DataKeys}}, $DataKey; | |
269 $TextDataMapRef->{DataKey}{$DataKey} = $DataKey; | |
270 | |
271 for $ColIndex (0 .. $#DataColNums) { | |
272 $ColNum = $DataColNums[$ColIndex]; | |
273 $ColID = $DataColIDs[$ColIndex]; | |
274 @{$TextDataMapRef->{$ColID}{$DataKey}} = (); | |
275 } | |
276 } | |
277 } | |
278 | |
279 # Track column data values... | |
280 for $ColIndex (0 .. $#DataColNums) { | |
281 $ColID = $DataColIDs[$ColIndex]; | |
282 | |
283 $ColNum = $DataColNums[$ColIndex]; | |
284 $ColValue = $LineWords[$ColNum - 1]; | |
285 | |
286 if ($UniqueDataKeyMode) { | |
287 $TextDataMapRef->{$ColID}{$DataKey} = $ColValue; | |
288 } | |
289 else { | |
290 push @{$TextDataMapRef->{$ColID}{$DataKey}}, $ColValue; | |
291 } | |
292 } | |
293 | |
294 } | |
295 | |
296 print "\nTotal number of lines in file $TextDataFile: $LineCount\n"; | |
297 print "Total number of lines ignored: $IgnoredLineCount\n"; | |
298 | |
299 close TEXTDATAFILE; | |
300 } | |
301 | |
302 # Returns a 32 bit integer hash code using One-at-a-time algorithm By Bob Jenkins [Ref 38]. It's also implemented in | |
303 # Perl for internal hash keys in hv.h include file. | |
304 # | |
305 # It's not clear how to force Perl perform unsigned integer arithmetic irrespective of the OS/Platform and | |
306 # the value of use64bitint flag used during its compilation. | |
307 # | |
308 # In order to generate a consistent 32 bit has code across OS/platforms, the following methodology appear | |
309 # to work: | |
310 # | |
311 # o Use MaxHashCodeMask to retrieve appropriate bits after left shifting by bit operators and additions | |
312 # o Stay away from "use integer" to avoid signed integer arithmetic for bit operators | |
313 # | |
314 # | |
315 # MaxHashCodeMask (2147483647) corresponds to the maximum value which can be stored in 31 bits | |
316 # | |
317 my($MaxHashCodeMask); | |
318 $MaxHashCodeMask = 2**31 - 1; | |
319 | |
320 sub HashCode { | |
321 my($String) = @_; | |
322 my($HashCode, $Value, $ShiftedHashCode); | |
323 | |
324 $HashCode = 0; | |
325 for $Value (unpack('C*', $String)) { | |
326 $HashCode += $Value; | |
327 | |
328 $ShiftedHashCode = $HashCode << 10; | |
329 if ($ShiftedHashCode > $MaxHashCodeMask) { | |
330 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask; | |
331 } | |
332 | |
333 $HashCode += $ShiftedHashCode; | |
334 if ($HashCode > $MaxHashCodeMask) { | |
335 $HashCode = $HashCode & $MaxHashCodeMask; | |
336 } | |
337 | |
338 $HashCode ^= ($HashCode >> 6); | |
339 } | |
340 | |
341 $ShiftedHashCode = $HashCode << 3; | |
342 if ($ShiftedHashCode > $MaxHashCodeMask) { | |
343 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask; | |
344 } | |
345 | |
346 $HashCode += $ShiftedHashCode; | |
347 if ($HashCode > $MaxHashCodeMask) { | |
348 $HashCode = $HashCode & $MaxHashCodeMask; | |
349 } | |
350 $HashCode ^= ($HashCode >> 11); | |
351 | |
352 $ShiftedHashCode = $HashCode << 15; | |
353 if ($ShiftedHashCode > $MaxHashCodeMask) { | |
354 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask; | |
355 } | |
356 | |
357 $HashCode += $ShiftedHashCode; | |
358 if ($HashCode > $MaxHashCodeMask) { | |
359 $HashCode = $HashCode & $MaxHashCodeMask; | |
360 } | |
361 return $HashCode; | |
362 } | |
363 | |
364 # Check out the string: Is it defined and has a non zero length? | |
365 sub IsEmpty { | |
366 my($TheString) = @_; | |
367 my($Status) = 1; | |
368 | |
369 $Status = (defined($TheString) && length($TheString)) ? 0 : 1; | |
370 | |
371 return $Status; | |
372 } | |
373 | |
374 # Is first specified number power of second specified number... | |
375 sub IsNumberPowerOfNumber { | |
376 my($FirstNum, $SecondNum) = @_; | |
377 my($PowerValue); | |
378 | |
379 $PowerValue = log($FirstNum)/log($SecondNum); | |
380 | |
381 return IsInteger($PowerValue) ? 1 : 0; | |
382 } | |
383 | |
384 # Check out the string: Is it an integer? | |
385 sub IsInteger { | |
386 my($TheString) = @_; | |
387 my($Status) = 0; | |
388 | |
389 if (defined($TheString) && length($TheString)) { | |
390 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString); | |
391 $TheString =~ s/^[+-]//; | |
392 $Status = ($TheString =~ /[^0-9]/) ? 0 : 1; | |
393 } | |
394 return $Status; | |
395 } | |
396 | |
397 # Check out the string: Is it an integer with value > 0? | |
398 sub IsPositiveInteger { | |
399 my($TheString) = @_; | |
400 my($Status) = 0; | |
401 | |
402 $Status = IsInteger($TheString) ? ($TheString > 0 ? 1 : 0) : 0; | |
403 | |
404 return $Status; | |
405 } | |
406 | |
407 | |
408 # Check out the string: Is it a float? | |
409 sub IsFloat { | |
410 my($TheString) = @_; | |
411 my($Status) = 0; | |
412 | |
413 if (defined($TheString) && length($TheString)) { | |
414 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString); | |
415 $TheString =~ s/^[+-]//; | |
416 $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1); | |
417 } | |
418 return $Status; | |
419 } | |
420 | |
421 # Check out the string: Is it defined and has a non zero length? | |
422 sub IsNotEmpty { | |
423 my($TheString) = @_; | |
424 my($Status); | |
425 | |
426 $Status = IsEmpty($TheString) ? 0 : 1; | |
427 | |
428 return $Status; | |
429 } | |
430 | |
431 # Check out the string: Does it only contain numerical data? | |
432 sub IsNumerical { | |
433 my($TheString) = @_; | |
434 my($Status) = 0; | |
435 | |
436 if (defined($TheString) && length($TheString)) { | |
437 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString); | |
438 $TheString =~ s/^[+-]//; | |
439 $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1); | |
440 } | |
441 return $Status; | |
442 } | |
443 | |
444 # Join different words using delimiter and quote parameters. And return as | |
445 # a string value. | |
446 sub JoinWords { | |
447 my($Words, $Delim, $Quote) = @_; | |
448 | |
449 if (!@$Words) { | |
450 return ""; | |
451 } | |
452 | |
453 $Quote = $Quote ? "\"" : ""; | |
454 my(@NewWords) = map { (defined($_) && length($_)) ? "${Quote}$_${Quote}" : "${Quote}${Quote}" } @$Words; | |
455 | |
456 return join $Delim, @NewWords; | |
457 } | |
458 | |
459 # Split string value containing quoted or unquoted words in to an array containing | |
460 # unquoted words. | |
461 # | |
462 # This function is used to split strings generated by JoinWords. | |
463 # | |
464 sub SplitWords { | |
465 my($Line, $Delim) = @_; | |
466 | |
467 if (!$Line) { | |
468 return (); | |
469 } | |
470 | |
471 # Is it a quoted string? | |
472 if ($Line =~ /^\"/) { | |
473 # Take out first and last quote... | |
474 $Line =~ s/^\"//; $Line =~ s/\"$//; | |
475 | |
476 $Delim = "\"$Delim\""; | |
477 } | |
478 return split /$Delim/, $Line; | |
479 } | |
480 | |
481 # Based on quote parameter, figure out what to do | |
482 sub QuoteAWord { | |
483 my($Word, $Quote) = @_; | |
484 my($QuotedWord); | |
485 | |
486 $QuotedWord = ""; | |
487 if ($Word) { | |
488 $QuotedWord = $Word; | |
489 } | |
490 if ($Quote) { | |
491 $QuotedWord = "\"$QuotedWord\""; | |
492 } | |
493 return ($QuotedWord); | |
494 } | |
495 | |
496 # Remove leading white space characters from the string... | |
497 sub RemoveLeadingWhiteSpaces { | |
498 my($InString) = @_; | |
499 my($OutString, $TrailingString, $LeadingWhiteSpace); | |
500 | |
501 $OutString = $InString; | |
502 if (length($InString) && ContainsWhiteSpaces($InString)) { | |
503 $OutString =~ s/^([ \t\r\n\f]*)(.*?)$/$2/; | |
504 } | |
505 return $OutString; | |
506 } | |
507 | |
508 # Remove Trailing white space characters from the string... | |
509 sub RemoveTrailingWhiteSpaces { | |
510 my($InString) = @_; | |
511 my($OutString, $LeadingString, $TrailingWhiteSpace); | |
512 | |
513 $OutString = $InString; | |
514 if (length($InString) && ContainsWhiteSpaces($InString)) { | |
515 $OutString =~ s/^(.*?)([ \t\r\n\f]*)$/$1/; | |
516 } | |
517 return $OutString; | |
518 } | |
519 | |
520 # Remove both leading and trailing white space characters from the string... | |
521 sub RemoveLeadingAndTrailingWhiteSpaces { | |
522 my($InString) = @_; | |
523 my($OutString); | |
524 | |
525 $OutString = $InString; | |
526 if (length($InString) && ContainsWhiteSpaces($InString)) { | |
527 $OutString =~ s/^([ \t\r\n\f]*)(.*?)([ \t\r\n\f]*)$/$2/; | |
528 } | |
529 return $OutString; | |
530 } | |
531 | |
532 # Wrap text string... | |
533 sub WrapText { | |
534 my($InString, $WrapLength, $WrapDelimiter); | |
535 my($OutString); | |
536 | |
537 $WrapLength = 40; | |
538 $WrapDelimiter = "\n"; | |
539 if (@_ == 3) { | |
540 ($InString, $WrapLength, $WrapDelimiter) = @_; | |
541 } | |
542 elsif (@_ == 2) { | |
543 ($InString, $WrapLength) = @_; | |
544 } | |
545 else { | |
546 ($InString, $WrapLength) = @_; | |
547 } | |
548 $OutString = $InString; | |
549 if ($InString && (length($InString) > $WrapLength)) { | |
550 $OutString = ""; | |
551 my($Index, $Length, $FirstPiece, $StringPiece); | |
552 $Index = 0; $Length = length($InString); | |
553 $FirstPiece = 1; | |
554 for ($Index = 0; $Index < $Length; $Index += $WrapLength) { | |
555 if (($Index + $WrapLength) < $Length) { | |
556 $StringPiece = substr($InString, $Index, $WrapLength); | |
557 } | |
558 else { | |
559 # Last piece of the string... | |
560 $StringPiece = substr($InString, $Index, $WrapLength); | |
561 } | |
562 if ($FirstPiece) { | |
563 $FirstPiece = 0; | |
564 $OutString = $StringPiece; | |
565 } | |
566 else { | |
567 $OutString .= "${WrapDelimiter}${StringPiece}"; | |
568 } | |
569 } | |
570 } | |
571 return $OutString; | |
572 } | |
573 | |
574 1; | |
575 | |
576 __END__ | |
577 | |
578 =head1 NAME | |
579 | |
580 TextUtil | |
581 | |
582 =head1 SYNOPSIS | |
583 | |
584 use TextUtil; | |
585 | |
586 use TextUtil qw(:all); | |
587 | |
588 =head1 DESCRIPTION | |
589 | |
590 B<TextUtil> module provides the following functions: | |
591 | |
592 AddNumberSuffix, ContainsWhiteSpaces, GetTextFileDataByNonUniqueKey, | |
593 GetTextFileDataByUniqueKey, GetTextLine, HashCode, IsEmpty, IsFloat, IsInteger, | |
594 IsNotEmpty, IsNumberPowerOfNumber, IsNumerical, IsPositiveInteger, JoinWords, | |
595 QuoteAWord, RemoveLeadingAndTrailingWhiteSpaces, RemoveLeadingWhiteSpaces, | |
596 RemoveTrailingWhiteSpaces, SplitWords, WrapText | |
597 | |
598 =head1 FUNCTIONS | |
599 | |
600 =over 4 | |
601 | |
602 =item B<AddNumberSuffix> | |
603 | |
604 $NumberWithSuffix = AddNumberSuffix($IntegerValue); | |
605 | |
606 Returns number with appropriate suffix: 0, 1st, 2nd, 3rd, 4th, and so on. | |
607 | |
608 =item B<ContainsWhiteSpaces> | |
609 | |
610 $Status = ContainsWhiteSpaces($TheString); | |
611 | |
612 Returns 1 or 0 based on whether the string contains any white spaces. | |
613 | |
614 =item B<GetTextLine> | |
615 | |
616 $Line = GetTextLine(\*TEXTFILE); | |
617 | |
618 Reads next line from an already opened text file, takes out any carriage return, | |
619 and returns it as a string. NULL is returned for EOF. | |
620 | |
621 =item B<GetTextFileDataByNonUniqueKey> | |
622 | |
623 GetTextFileDataByNonUniqueKey($TextDataFile, $TextDataMapRef, | |
624 $DataKeyColNum, $InDelim); | |
625 | |
626 Load data from a text file into the specified hash reference using a specific | |
627 column for non-unique data key values. | |
628 | |
629 The lines starting with # are treated as comments and ignored. First line | |
630 not starting with # must contain column labels and the number of columns in | |
631 all other data rows must match the number of column labels. | |
632 | |
633 The first column is assumed to contain data key value by default; all other columns | |
634 contain data as indicated in their column labels. | |
635 | |
636 In order to avoid dependence of data access on the specified column labels, the | |
637 column data is loaded into hash with Column<Num> hash keys, where column number | |
638 start from 1. The data key column is not available as Colnum<Num> hash key; | |
639 | |
640 The format of the data structure loaded into a specified hash reference is: | |
641 | |
642 @{$TextDataMapRef->{DataKeys}} - Array of unique data keys | |
643 @{$TextDataMapRef->{ColLabels}} - Array of column labels | |
644 @{$TextDataMapRef->{DataColIDs}} - Array of data column IDs | |
645 $TextDataMapRef->{NumOfCols} - Number of columns | |
646 %{$TextDataMapRef->{DataKey}} - Hash keys pair: <DataKey, DataKey> | |
647 @{$TextDataMapRef->{DataCol<Num>}} - Hash keys pair with data as an array: | |
648 <DataCol<Num>, DataKey> | |
649 | |
650 =item B<GetTextFileDataByUniqueKey> | |
651 | |
652 GetTextFileDataByUniqueKey($TextDataFile, $TextDataMapRef, $DataKeyColNum, | |
653 $InDelim); | |
654 | |
655 Load data from a text file into the specified hash reference using a a specific | |
656 column for unique data key values. | |
657 | |
658 The lines starting with # are treated as comments and ignored. First line | |
659 not starting with # must contain column labels and the number of columns in | |
660 all other data rows must match the number of column labels. | |
661 | |
662 The first column is assumed to contain data key value by default; all other columns | |
663 contain data as indicated in their column labels. | |
664 | |
665 In order to avoid dependence of data access on the specified column labels, the | |
666 column data is loaded into hash with Column<Num> hash keys, where column number | |
667 start from 1. The data key column is not available as Colnum<Num> hash key; | |
668 | |
669 The format of the data structure loaded into a specified hash reference is: | |
670 | |
671 @{$TextDataMapRef->{DataKeys}} - Array of unique data keys | |
672 @{$TextDataMapRef->{ColLabels}} - Array of column labels | |
673 @{$TextDataMapRef->{DataColIDs}} - Array of data column IDs | |
674 $TextDataMapRef->{NumOfCols} - Number of columns | |
675 %{$TextDataMapRef->{DataKey}} - Hash keys pair: <DataKey, DataKey> | |
676 %{$TextDataMapRef->{DataCol<Num>}} - Hash keys pair: <DataCol<Num>, DataKey> | |
677 | |
678 =item B<HashCode> | |
679 | |
680 $HashCode = HashCode($TheString); | |
681 | |
682 Returns a 32 bit integer hash code using One-at-a-time algorithm By Bob Jenkins [Ref 38]. | |
683 It's also implemented in Perl for internal hash keys in hv.h include file. | |
684 | |
685 =item B<IsEmpty> | |
686 | |
687 $Status = IsEmpty($TheString); | |
688 | |
689 Returns 1 or 0 based on whether the string is empty. | |
690 | |
691 =item B<IsInteger> | |
692 | |
693 $Status = IsInteger($TheString); | |
694 | |
695 Returns 1 or 0 based on whether the string is a positive integer. | |
696 | |
697 =item B<IsPositiveInteger> | |
698 | |
699 $Status = IsPositiveInteger($TheString); | |
700 | |
701 Returns 1 or 0 based on whether the string is an integer. | |
702 | |
703 =item B<IsFloat> | |
704 | |
705 $Status = IsFloat($TheString); | |
706 | |
707 Returns 1 or 0 based on whether the string is a float. | |
708 | |
709 =item B<IsNotEmpty> | |
710 | |
711 $Status = IsNotEmpty($TheString); | |
712 | |
713 Returns 0 or 1 based on whether the string is empty. | |
714 | |
715 =item B<IsNumerical> | |
716 | |
717 $Status = IsNumerical($TheString); | |
718 | |
719 Returns 1 or 0 based on whether the string is a number. | |
720 | |
721 =item B<IsNumberPowerOfNumber> | |
722 | |
723 $Status = IsNumberPowerOfNumber($FirstNum, $SecondNum); | |
724 | |
725 Returns 1 or 0 based on whether the first number is a power of second number. | |
726 | |
727 =item B<JoinWords> | |
728 | |
729 $JoinedWords = JoinWords($Words, $Delim, $Quote); | |
730 | |
731 Joins different words using delimiter and quote parameters, and returns it | |
732 as a string. | |
733 | |
734 =item B<QuoteAWord> | |
735 | |
736 $QuotedWord = QuoteAWord($Word, $Quote); | |
737 | |
738 Returns a quoted string based on I<Quote> value. | |
739 | |
740 =item B<RemoveLeadingWhiteSpaces> | |
741 | |
742 $OutString = RemoveLeadingWhiteSpaces($InString); | |
743 | |
744 Returns a string without any leading and traling white spaces. | |
745 | |
746 =item B<RemoveTrailingWhiteSpaces> | |
747 | |
748 $OutString = RemoveTrailingWhiteSpaces($InString); | |
749 | |
750 Returns a string without any trailing white spaces. | |
751 | |
752 =item B<RemoveLeadingAndTrailingWhiteSpaces> | |
753 | |
754 $OutString = RemoveLeadingAndTrailingWhiteSpaces($InString); | |
755 | |
756 Returns a string without any leading and traling white spaces. | |
757 | |
758 =item B<SplitWords> | |
759 | |
760 @Words = SplitWords($Line, $Delimiter); | |
761 | |
762 Returns an array I<Words> ontaining unquoted words generated after spliting | |
763 string value I<Line> containing quoted or unquoted words. | |
764 | |
765 This function is used to split strings generated by JoinWords as replacement | |
766 for Perl's core module funtion Text::ParseWords::quotewords() which dumps core | |
767 on very long strings. | |
768 | |
769 =item B<WrapText> | |
770 | |
771 $OutString = WrapText($InString, [$WrapLength, $WrapDelimiter]); | |
772 | |
773 Returns a wrapped string. By default, I<WrapLenght> is I<40> and I<WrapDelimiter> | |
774 is Unix new line character. | |
775 | |
776 =back | |
777 | |
778 =head1 AUTHOR | |
779 | |
780 Manish Sud <msud@san.rr.com> | |
781 | |
782 =head1 SEE ALSO | |
783 | |
784 FileUtil.pm | |
785 | |
786 =head1 COPYRIGHT | |
787 | |
788 Copyright (C) 2015 Manish Sud. All rights reserved. | |
789 | |
790 This file is part of MayaChemTools. | |
791 | |
792 MayaChemTools is free software; you can redistribute it and/or modify it under | |
793 the terms of the GNU Lesser General Public License as published by the Free | |
794 Software Foundation; either version 3 of the License, or (at your option) | |
795 any later version. | |
796 | |
797 =cut |