comparison mayachemtools/lib/TextUtil.pm @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:73ae111cf86f
1 package TextUtil;
2 #
3 # $RCSfile: TextUtil.pm,v $
4 # $Date: 2015/03/22 20:08:26 $
5 # $Revision: 1.45 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Exporter;
31
32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
33
34 @ISA = qw(Exporter);
35 @EXPORT = qw(AddNumberSuffix ContainsWhiteSpaces GetTextLine GetTextFileDataByUniqueKey GetTextFileDataByNonUniqueKey HashCode IsEmpty IsNumberPowerOfNumber IsInteger IsPositiveInteger IsFloat IsNotEmpty IsNumerical JoinWords SplitWords QuoteAWord RemoveLeadingWhiteSpaces RemoveTrailingWhiteSpaces RemoveLeadingAndTrailingWhiteSpaces WrapText);
36 @EXPORT_OK = qw();
37 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
38
39 # Add number suffix...
40 sub AddNumberSuffix {
41 my($Value) = @_;
42 my($ValueWithSuffix, $Suffix);
43
44 $ValueWithSuffix = $Value;
45 if (!IsPositiveInteger($Value)) {
46 return $ValueWithSuffix;
47 }
48 $Suffix = "th";
49 if ($Value < 10 || $Value > 20) {
50 my $Remainder = $Value % 10;
51 $Suffix = ($Remainder == 1) ? "st" : (($Remainder == 2) ? "nd" : (($Remainder == 3) ? "rd" : "th"));
52 }
53 $ValueWithSuffix = "${ValueWithSuffix}${Suffix}";
54 return $ValueWithSuffix;
55 }
56
57 # Check out the string: Doen it contain any white space characters?
58 sub ContainsWhiteSpaces {
59 my($TheString) = @_;
60 my($Status) = 0;
61
62 if (defined($TheString) && length($TheString)) {
63 $Status = ($TheString =~ /[ \t\r\n\f]/ ) ? 1 : 0;
64 }
65 return $Status;
66 }
67
68 # Read the line, change to UNIX new line char, and chop off new line char as well...
69 sub GetTextLine {
70 my($TextFileRef) = @_;
71 my($Line) = '';
72
73 # Get the next non empty line...
74 LINE: while (defined($_ = <$TextFileRef>)) {
75 # Change Windows and Mac new line char to UNIX...
76 s/(\r\n)|(\r)/\n/g;
77
78 # Take out any new line char at the end by explicitly removing it instead of using
79 # chomp, which might not always work correctly on files generated on a system
80 # with a value of input line separator different from the current system...
81 s/\n$//g;
82
83 # Doesn't hurt to chomp...
84 chomp;
85
86 $Line = $_;
87 if (length $Line) {
88 last LINE;
89 }
90 }
91 return $Line;
92 }
93
94 # Load data from a CSV file into the specified hash reference using a specific
95 # column for unique data key values.
96 #
97 # The lines starting with # are treated as comments and ignored. First line
98 # not starting with # must contain column labels and the number of columns in
99 # all other data rows must match the number of column labels.
100 #
101 # The first column is assumed to contain data key value by default; all other columns
102 # contain data as indicated in their column labels.
103 #
104 # In order to avoid dependence of data access on the specified column labels, the
105 # column data is loaded into hash with Column<Num> hash keys, where column number
106 # start from 1. The data key column is not available as Colnum<Num> hash key;
107 #
108 # The format of the data structure loaded into a specified hash reference is:
109 #
110 # @{$TextDataMapRef->{DataKeys}} - Array of unique data keys
111 # @{$TextDataMapRef->{ColLabels}} - Array of column labels
112 # @{$TextDataMapRef->{DataColIDs}} - Array of data column IDs
113 # $TextDataMapRef->{NumOfCols} - Number of columns
114 # %{$TextDataMapRef->{DataKey}} - Hash keys pair: <DataKey, DataKey>
115 # %{$TextDataMapRef->{DataCol<Num>}} - Hash keys pair: <DataCol<Num>, DataKey>
116 #
117 # Caveats:
118 # . The column number start from 1.
119 # . Column data for data key column column is not loaded into <Column<Num>, DataKey> hash keys pairs.
120 #
121 sub GetTextFileDataByUniqueKey {
122 my($TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim) = @_;
123
124 return _GetTextFileData("UniqueKey", $TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim);
125 }
126
127 # Load data from a CSV file into the specified hash reference using a specific
128 # column for non-unique data key values.
129 #
130 # The lines starting with # are treated as comments and ignored. First line
131 # not starting with # must contain column labels and the number of columns in
132 # all other data rows must match the number of column labels.
133 #
134 # The first column is assumed to contain data key value by default; all other columns
135 # contain data as indicated in their column labels.
136 #
137 # In order to avoid dependence of data access on the specified column labels, the
138 # column data is loaded into hash with Column<Num> hash keys, where column number
139 # start from 1. The data key column is not available as Colnum<Num> hash key;
140 #
141 # The format of the data structure loaded into a specified hash reference is:
142 #
143 # @{$TextDataMapRef->{DataKeys}} - Array of unique data keys
144 # @{$TextDataMapRef->{ColLabels}} - Array of column labels
145 # @{$TextDataMapRef->{DataColIDs}} - Array of data column IDs
146 # $TextDataMapRef->{NumOfCols} - Number of columns
147 # %{$TextDataMapRef->{DataKey}} - Hash keys pair: <DataKey, DataKey>
148 # @{$TextDataMapRef->{DataCol<Num>}} - Hash keys pair with data as an array: <DataCol<Num>, DataKey>
149 #
150 # Caveats:
151 # . The column number start from 1.
152 # . Column data for data key column column is not loaded into <Column<Num>, DataKey> hash keys pairs.
153 #
154 sub GetTextFileDataByNonUniqueKey {
155 my($TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim) = @_;
156
157 return _GetTextFileData("NonUniqueKey", $TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim);
158 }
159
160 # Loadtext file data using unique or non-uniqye data column key...
161 #
162 sub _GetTextFileData {
163 my($DataKeyMode, $TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim) = @_;
164 my($DataKeyColIndex, $LineCount, $IgnoredLineCount, $UniqueDataKeyMode, $DataKey, $Line, $NumOfCols, $ColIndex, $ColNum, $ColID, $ColValue, @LineWords, @ColLabels, @DataColIDs, @DataColNums);
165
166 print "\nProcessing text data file $TextDataFile...\n";
167
168 $UniqueDataKeyMode = 0;
169 if ($DataKeyMode =~ /^UniqueKey$/i) {
170 $UniqueDataKeyMode = 1;
171 }
172
173 # Setup default values...
174 $DataKeyColNum = defined $DataKeyColNum ? $DataKeyColNum : 1;
175
176 if ($TextDataFile =~ /^\.tsv$/i) {
177 $InDelim = "\t";
178 }
179 else {
180 $InDelim = "\,";
181 if ($InDelim =~ /^semicolon$/i) {
182 $InDelim = "\;";
183 }
184 }
185
186 ($LineCount, $IgnoredLineCount) = (0) x 2;
187
188 open TEXTDATAFILE, "$TextDataFile" or die "Couldn't open $TextDataFile: $! ...";
189
190 # Skip lines up to column labels...
191 LINE: while ($Line = TextUtil::GetTextLine(\*TEXTDATAFILE)) {
192 $LineCount++;
193 if ($Line =~ /^#/) {
194 $IgnoredLineCount++;
195 }
196 else {
197 last LINE;
198 }
199 }
200
201 # Initialize data map...
202 %{$TextDataMapRef} = ();
203 @{$TextDataMapRef->{DataKeys}} = ();
204 @{$TextDataMapRef->{ColLabels}} = ();
205 @{$TextDataMapRef->{DataColIDs}} = ();
206 $TextDataMapRef->{NumOfCols} = undef;
207
208 # Process column labels...
209 @ColLabels= quotewords($InDelim, 0, $Line);
210 $NumOfCols = @ColLabels;
211
212 if ($DataKeyColNum < 1 || $DataKeyColNum > $NumOfCols) {
213 warn "Warning: Ignoring text data file $TextDataFile: Invalid data key column number, $DataKeyColNum, specified. It must be > 0 or <= $NumOfCols, number of columns in the text file ...";
214 return;
215 }
216 $DataKeyColIndex = $DataKeyColNum - 1;
217
218 $TextDataMapRef->{NumOfCols} = $NumOfCols;
219 push @{$TextDataMapRef->{ColLabels}}, @ColLabels;
220
221 # Set up column data IDs for tracking the data...
222 @DataColNums = ();
223 @DataColIDs = ();
224 COLNUM: for $ColNum (1 .. $NumOfCols) {
225 if ($ColNum == $DataKeyColNum) {
226 next COLNUM;
227 }
228 push @DataColNums, $ColNum;
229 $ColID = "DataCol${ColNum}";
230 push @DataColIDs, $ColID;
231 }
232 push @{$TextDataMapRef->{DataColIDs}}, @DataColIDs;
233
234 # Initialize column data hash...
235 %{$TextDataMapRef->{DataKey}} = ();
236 for $ColIndex (0 .. $#DataColNums) {
237 $ColNum = $DataColNums[$ColIndex];
238 $ColID = $DataColIDs[$ColIndex];
239 %{$TextDataMapRef->{$ColID}} = ();
240 }
241
242 LINE: while ($Line = TextUtil::GetTextLine(\*TEXTDATAFILE)) {
243 $LineCount++;
244 if ($Line =~ /^#/) {
245 $IgnoredLineCount++;
246 next LINE;
247 }
248
249 @LineWords = quotewords($InDelim, 0, $Line);
250 if (@LineWords != $NumOfCols) {
251 $IgnoredLineCount++;
252 warn "Warning: The number of data fields, @LineWords, in $TextDataFile must be $NumOfCols.\nIgnoring line number $LineCount: $Line...\n";
253 next LINE;
254 }
255 $DataKey = $LineWords[$DataKeyColIndex];
256
257 if ($UniqueDataKeyMode) {
258 if (exists $TextDataMapRef->{DataKey}{$DataKey}) {
259 $IgnoredLineCount++;
260 warn "Warning: The data key, $DataKey, in data column key number, $DataKeyColNum, is already present.\nIgnoring line number $LineCount: $Line...\n";
261 next LINE;
262 }
263 push @{$TextDataMapRef->{DataKeys}}, $DataKey;
264 $TextDataMapRef->{DataKey}{$DataKey} = $DataKey;
265 }
266 else {
267 if (!exists $TextDataMapRef->{DataKey}{$DataKey}) {
268 push @{$TextDataMapRef->{DataKeys}}, $DataKey;
269 $TextDataMapRef->{DataKey}{$DataKey} = $DataKey;
270
271 for $ColIndex (0 .. $#DataColNums) {
272 $ColNum = $DataColNums[$ColIndex];
273 $ColID = $DataColIDs[$ColIndex];
274 @{$TextDataMapRef->{$ColID}{$DataKey}} = ();
275 }
276 }
277 }
278
279 # Track column data values...
280 for $ColIndex (0 .. $#DataColNums) {
281 $ColID = $DataColIDs[$ColIndex];
282
283 $ColNum = $DataColNums[$ColIndex];
284 $ColValue = $LineWords[$ColNum - 1];
285
286 if ($UniqueDataKeyMode) {
287 $TextDataMapRef->{$ColID}{$DataKey} = $ColValue;
288 }
289 else {
290 push @{$TextDataMapRef->{$ColID}{$DataKey}}, $ColValue;
291 }
292 }
293
294 }
295
296 print "\nTotal number of lines in file $TextDataFile: $LineCount\n";
297 print "Total number of lines ignored: $IgnoredLineCount\n";
298
299 close TEXTDATAFILE;
300 }
301
302 # Returns a 32 bit integer hash code using One-at-a-time algorithm By Bob Jenkins [Ref 38]. It's also implemented in
303 # Perl for internal hash keys in hv.h include file.
304 #
305 # It's not clear how to force Perl perform unsigned integer arithmetic irrespective of the OS/Platform and
306 # the value of use64bitint flag used during its compilation.
307 #
308 # In order to generate a consistent 32 bit has code across OS/platforms, the following methodology appear
309 # to work:
310 #
311 # o Use MaxHashCodeMask to retrieve appropriate bits after left shifting by bit operators and additions
312 # o Stay away from "use integer" to avoid signed integer arithmetic for bit operators
313 #
314 #
315 # MaxHashCodeMask (2147483647) corresponds to the maximum value which can be stored in 31 bits
316 #
317 my($MaxHashCodeMask);
318 $MaxHashCodeMask = 2**31 - 1;
319
320 sub HashCode {
321 my($String) = @_;
322 my($HashCode, $Value, $ShiftedHashCode);
323
324 $HashCode = 0;
325 for $Value (unpack('C*', $String)) {
326 $HashCode += $Value;
327
328 $ShiftedHashCode = $HashCode << 10;
329 if ($ShiftedHashCode > $MaxHashCodeMask) {
330 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask;
331 }
332
333 $HashCode += $ShiftedHashCode;
334 if ($HashCode > $MaxHashCodeMask) {
335 $HashCode = $HashCode & $MaxHashCodeMask;
336 }
337
338 $HashCode ^= ($HashCode >> 6);
339 }
340
341 $ShiftedHashCode = $HashCode << 3;
342 if ($ShiftedHashCode > $MaxHashCodeMask) {
343 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask;
344 }
345
346 $HashCode += $ShiftedHashCode;
347 if ($HashCode > $MaxHashCodeMask) {
348 $HashCode = $HashCode & $MaxHashCodeMask;
349 }
350 $HashCode ^= ($HashCode >> 11);
351
352 $ShiftedHashCode = $HashCode << 15;
353 if ($ShiftedHashCode > $MaxHashCodeMask) {
354 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask;
355 }
356
357 $HashCode += $ShiftedHashCode;
358 if ($HashCode > $MaxHashCodeMask) {
359 $HashCode = $HashCode & $MaxHashCodeMask;
360 }
361 return $HashCode;
362 }
363
364 # Check out the string: Is it defined and has a non zero length?
365 sub IsEmpty {
366 my($TheString) = @_;
367 my($Status) = 1;
368
369 $Status = (defined($TheString) && length($TheString)) ? 0 : 1;
370
371 return $Status;
372 }
373
374 # Is first specified number power of second specified number...
375 sub IsNumberPowerOfNumber {
376 my($FirstNum, $SecondNum) = @_;
377 my($PowerValue);
378
379 $PowerValue = log($FirstNum)/log($SecondNum);
380
381 return IsInteger($PowerValue) ? 1 : 0;
382 }
383
384 # Check out the string: Is it an integer?
385 sub IsInteger {
386 my($TheString) = @_;
387 my($Status) = 0;
388
389 if (defined($TheString) && length($TheString)) {
390 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString);
391 $TheString =~ s/^[+-]//;
392 $Status = ($TheString =~ /[^0-9]/) ? 0 : 1;
393 }
394 return $Status;
395 }
396
397 # Check out the string: Is it an integer with value > 0?
398 sub IsPositiveInteger {
399 my($TheString) = @_;
400 my($Status) = 0;
401
402 $Status = IsInteger($TheString) ? ($TheString > 0 ? 1 : 0) : 0;
403
404 return $Status;
405 }
406
407
408 # Check out the string: Is it a float?
409 sub IsFloat {
410 my($TheString) = @_;
411 my($Status) = 0;
412
413 if (defined($TheString) && length($TheString)) {
414 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString);
415 $TheString =~ s/^[+-]//;
416 $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1);
417 }
418 return $Status;
419 }
420
421 # Check out the string: Is it defined and has a non zero length?
422 sub IsNotEmpty {
423 my($TheString) = @_;
424 my($Status);
425
426 $Status = IsEmpty($TheString) ? 0 : 1;
427
428 return $Status;
429 }
430
431 # Check out the string: Does it only contain numerical data?
432 sub IsNumerical {
433 my($TheString) = @_;
434 my($Status) = 0;
435
436 if (defined($TheString) && length($TheString)) {
437 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString);
438 $TheString =~ s/^[+-]//;
439 $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1);
440 }
441 return $Status;
442 }
443
444 # Join different words using delimiter and quote parameters. And return as
445 # a string value.
446 sub JoinWords {
447 my($Words, $Delim, $Quote) = @_;
448
449 if (!@$Words) {
450 return "";
451 }
452
453 $Quote = $Quote ? "\"" : "";
454 my(@NewWords) = map { (defined($_) && length($_)) ? "${Quote}$_${Quote}" : "${Quote}${Quote}" } @$Words;
455
456 return join $Delim, @NewWords;
457 }
458
459 # Split string value containing quoted or unquoted words in to an array containing
460 # unquoted words.
461 #
462 # This function is used to split strings generated by JoinWords.
463 #
464 sub SplitWords {
465 my($Line, $Delim) = @_;
466
467 if (!$Line) {
468 return ();
469 }
470
471 # Is it a quoted string?
472 if ($Line =~ /^\"/) {
473 # Take out first and last quote...
474 $Line =~ s/^\"//; $Line =~ s/\"$//;
475
476 $Delim = "\"$Delim\"";
477 }
478 return split /$Delim/, $Line;
479 }
480
481 # Based on quote parameter, figure out what to do
482 sub QuoteAWord {
483 my($Word, $Quote) = @_;
484 my($QuotedWord);
485
486 $QuotedWord = "";
487 if ($Word) {
488 $QuotedWord = $Word;
489 }
490 if ($Quote) {
491 $QuotedWord = "\"$QuotedWord\"";
492 }
493 return ($QuotedWord);
494 }
495
496 # Remove leading white space characters from the string...
497 sub RemoveLeadingWhiteSpaces {
498 my($InString) = @_;
499 my($OutString, $TrailingString, $LeadingWhiteSpace);
500
501 $OutString = $InString;
502 if (length($InString) && ContainsWhiteSpaces($InString)) {
503 $OutString =~ s/^([ \t\r\n\f]*)(.*?)$/$2/;
504 }
505 return $OutString;
506 }
507
508 # Remove Trailing white space characters from the string...
509 sub RemoveTrailingWhiteSpaces {
510 my($InString) = @_;
511 my($OutString, $LeadingString, $TrailingWhiteSpace);
512
513 $OutString = $InString;
514 if (length($InString) && ContainsWhiteSpaces($InString)) {
515 $OutString =~ s/^(.*?)([ \t\r\n\f]*)$/$1/;
516 }
517 return $OutString;
518 }
519
520 # Remove both leading and trailing white space characters from the string...
521 sub RemoveLeadingAndTrailingWhiteSpaces {
522 my($InString) = @_;
523 my($OutString);
524
525 $OutString = $InString;
526 if (length($InString) && ContainsWhiteSpaces($InString)) {
527 $OutString =~ s/^([ \t\r\n\f]*)(.*?)([ \t\r\n\f]*)$/$2/;
528 }
529 return $OutString;
530 }
531
532 # Wrap text string...
533 sub WrapText {
534 my($InString, $WrapLength, $WrapDelimiter);
535 my($OutString);
536
537 $WrapLength = 40;
538 $WrapDelimiter = "\n";
539 if (@_ == 3) {
540 ($InString, $WrapLength, $WrapDelimiter) = @_;
541 }
542 elsif (@_ == 2) {
543 ($InString, $WrapLength) = @_;
544 }
545 else {
546 ($InString, $WrapLength) = @_;
547 }
548 $OutString = $InString;
549 if ($InString && (length($InString) > $WrapLength)) {
550 $OutString = "";
551 my($Index, $Length, $FirstPiece, $StringPiece);
552 $Index = 0; $Length = length($InString);
553 $FirstPiece = 1;
554 for ($Index = 0; $Index < $Length; $Index += $WrapLength) {
555 if (($Index + $WrapLength) < $Length) {
556 $StringPiece = substr($InString, $Index, $WrapLength);
557 }
558 else {
559 # Last piece of the string...
560 $StringPiece = substr($InString, $Index, $WrapLength);
561 }
562 if ($FirstPiece) {
563 $FirstPiece = 0;
564 $OutString = $StringPiece;
565 }
566 else {
567 $OutString .= "${WrapDelimiter}${StringPiece}";
568 }
569 }
570 }
571 return $OutString;
572 }
573
574 1;
575
576 __END__
577
578 =head1 NAME
579
580 TextUtil
581
582 =head1 SYNOPSIS
583
584 use TextUtil;
585
586 use TextUtil qw(:all);
587
588 =head1 DESCRIPTION
589
590 B<TextUtil> module provides the following functions:
591
592 AddNumberSuffix, ContainsWhiteSpaces, GetTextFileDataByNonUniqueKey,
593 GetTextFileDataByUniqueKey, GetTextLine, HashCode, IsEmpty, IsFloat, IsInteger,
594 IsNotEmpty, IsNumberPowerOfNumber, IsNumerical, IsPositiveInteger, JoinWords,
595 QuoteAWord, RemoveLeadingAndTrailingWhiteSpaces, RemoveLeadingWhiteSpaces,
596 RemoveTrailingWhiteSpaces, SplitWords, WrapText
597
598 =head1 FUNCTIONS
599
600 =over 4
601
602 =item B<AddNumberSuffix>
603
604 $NumberWithSuffix = AddNumberSuffix($IntegerValue);
605
606 Returns number with appropriate suffix: 0, 1st, 2nd, 3rd, 4th, and so on.
607
608 =item B<ContainsWhiteSpaces>
609
610 $Status = ContainsWhiteSpaces($TheString);
611
612 Returns 1 or 0 based on whether the string contains any white spaces.
613
614 =item B<GetTextLine>
615
616 $Line = GetTextLine(\*TEXTFILE);
617
618 Reads next line from an already opened text file, takes out any carriage return,
619 and returns it as a string. NULL is returned for EOF.
620
621 =item B<GetTextFileDataByNonUniqueKey>
622
623 GetTextFileDataByNonUniqueKey($TextDataFile, $TextDataMapRef,
624 $DataKeyColNum, $InDelim);
625
626 Load data from a text file into the specified hash reference using a specific
627 column for non-unique data key values.
628
629 The lines starting with # are treated as comments and ignored. First line
630 not starting with # must contain column labels and the number of columns in
631 all other data rows must match the number of column labels.
632
633 The first column is assumed to contain data key value by default; all other columns
634 contain data as indicated in their column labels.
635
636 In order to avoid dependence of data access on the specified column labels, the
637 column data is loaded into hash with Column<Num> hash keys, where column number
638 start from 1. The data key column is not available as Colnum<Num> hash key;
639
640 The format of the data structure loaded into a specified hash reference is:
641
642 @{$TextDataMapRef->{DataKeys}} - Array of unique data keys
643 @{$TextDataMapRef->{ColLabels}} - Array of column labels
644 @{$TextDataMapRef->{DataColIDs}} - Array of data column IDs
645 $TextDataMapRef->{NumOfCols} - Number of columns
646 %{$TextDataMapRef->{DataKey}} - Hash keys pair: <DataKey, DataKey>
647 @{$TextDataMapRef->{DataCol<Num>}} - Hash keys pair with data as an array:
648 <DataCol<Num>, DataKey>
649
650 =item B<GetTextFileDataByUniqueKey>
651
652 GetTextFileDataByUniqueKey($TextDataFile, $TextDataMapRef, $DataKeyColNum,
653 $InDelim);
654
655 Load data from a text file into the specified hash reference using a a specific
656 column for unique data key values.
657
658 The lines starting with # are treated as comments and ignored. First line
659 not starting with # must contain column labels and the number of columns in
660 all other data rows must match the number of column labels.
661
662 The first column is assumed to contain data key value by default; all other columns
663 contain data as indicated in their column labels.
664
665 In order to avoid dependence of data access on the specified column labels, the
666 column data is loaded into hash with Column<Num> hash keys, where column number
667 start from 1. The data key column is not available as Colnum<Num> hash key;
668
669 The format of the data structure loaded into a specified hash reference is:
670
671 @{$TextDataMapRef->{DataKeys}} - Array of unique data keys
672 @{$TextDataMapRef->{ColLabels}} - Array of column labels
673 @{$TextDataMapRef->{DataColIDs}} - Array of data column IDs
674 $TextDataMapRef->{NumOfCols} - Number of columns
675 %{$TextDataMapRef->{DataKey}} - Hash keys pair: <DataKey, DataKey>
676 %{$TextDataMapRef->{DataCol<Num>}} - Hash keys pair: <DataCol<Num>, DataKey>
677
678 =item B<HashCode>
679
680 $HashCode = HashCode($TheString);
681
682 Returns a 32 bit integer hash code using One-at-a-time algorithm By Bob Jenkins [Ref 38].
683 It's also implemented in Perl for internal hash keys in hv.h include file.
684
685 =item B<IsEmpty>
686
687 $Status = IsEmpty($TheString);
688
689 Returns 1 or 0 based on whether the string is empty.
690
691 =item B<IsInteger>
692
693 $Status = IsInteger($TheString);
694
695 Returns 1 or 0 based on whether the string is a positive integer.
696
697 =item B<IsPositiveInteger>
698
699 $Status = IsPositiveInteger($TheString);
700
701 Returns 1 or 0 based on whether the string is an integer.
702
703 =item B<IsFloat>
704
705 $Status = IsFloat($TheString);
706
707 Returns 1 or 0 based on whether the string is a float.
708
709 =item B<IsNotEmpty>
710
711 $Status = IsNotEmpty($TheString);
712
713 Returns 0 or 1 based on whether the string is empty.
714
715 =item B<IsNumerical>
716
717 $Status = IsNumerical($TheString);
718
719 Returns 1 or 0 based on whether the string is a number.
720
721 =item B<IsNumberPowerOfNumber>
722
723 $Status = IsNumberPowerOfNumber($FirstNum, $SecondNum);
724
725 Returns 1 or 0 based on whether the first number is a power of second number.
726
727 =item B<JoinWords>
728
729 $JoinedWords = JoinWords($Words, $Delim, $Quote);
730
731 Joins different words using delimiter and quote parameters, and returns it
732 as a string.
733
734 =item B<QuoteAWord>
735
736 $QuotedWord = QuoteAWord($Word, $Quote);
737
738 Returns a quoted string based on I<Quote> value.
739
740 =item B<RemoveLeadingWhiteSpaces>
741
742 $OutString = RemoveLeadingWhiteSpaces($InString);
743
744 Returns a string without any leading and traling white spaces.
745
746 =item B<RemoveTrailingWhiteSpaces>
747
748 $OutString = RemoveTrailingWhiteSpaces($InString);
749
750 Returns a string without any trailing white spaces.
751
752 =item B<RemoveLeadingAndTrailingWhiteSpaces>
753
754 $OutString = RemoveLeadingAndTrailingWhiteSpaces($InString);
755
756 Returns a string without any leading and traling white spaces.
757
758 =item B<SplitWords>
759
760 @Words = SplitWords($Line, $Delimiter);
761
762 Returns an array I<Words> ontaining unquoted words generated after spliting
763 string value I<Line> containing quoted or unquoted words.
764
765 This function is used to split strings generated by JoinWords as replacement
766 for Perl's core module funtion Text::ParseWords::quotewords() which dumps core
767 on very long strings.
768
769 =item B<WrapText>
770
771 $OutString = WrapText($InString, [$WrapLength, $WrapDelimiter]);
772
773 Returns a wrapped string. By default, I<WrapLenght> is I<40> and I<WrapDelimiter>
774 is Unix new line character.
775
776 =back
777
778 =head1 AUTHOR
779
780 Manish Sud <msud@san.rr.com>
781
782 =head1 SEE ALSO
783
784 FileUtil.pm
785
786 =head1 COPYRIGHT
787
788 Copyright (C) 2015 Manish Sud. All rights reserved.
789
790 This file is part of MayaChemTools.
791
792 MayaChemTools is free software; you can redistribute it and/or modify it under
793 the terms of the GNU Lesser General Public License as published by the Free
794 Software Foundation; either version 3 of the License, or (at your option)
795 any later version.
796
797 =cut