annotate mayachemtool/mayachemtools/lib/TextUtil.pm @ 0:a4a2ad5a214e draft default tip

Uploaded
author deepakjadmin
date Thu, 05 Nov 2015 02:37:56 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1 package TextUtil;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
2 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: TextUtil.pm,v $
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/03/22 20:08:26 $
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.45 $
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
6 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
8 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
10 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
12 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
17 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
22 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
27 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
28
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
30 use Exporter;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
31
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
33
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
34 @ISA = qw(Exporter);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
35 @EXPORT = qw(AddNumberSuffix ContainsWhiteSpaces GetTextLine GetTextFileDataByUniqueKey GetTextFileDataByNonUniqueKey HashCode IsEmpty IsNumberPowerOfNumber IsInteger IsPositiveInteger IsFloat IsNotEmpty IsNumerical JoinWords SplitWords QuoteAWord RemoveLeadingWhiteSpaces RemoveTrailingWhiteSpaces RemoveLeadingAndTrailingWhiteSpaces WrapText);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
36 @EXPORT_OK = qw();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
37 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
38
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
39 # Add number suffix...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
40 sub AddNumberSuffix {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
41 my($Value) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
42 my($ValueWithSuffix, $Suffix);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
43
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
44 $ValueWithSuffix = $Value;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
45 if (!IsPositiveInteger($Value)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
46 return $ValueWithSuffix;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
47 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
48 $Suffix = "th";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
49 if ($Value < 10 || $Value > 20) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
50 my $Remainder = $Value % 10;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
51 $Suffix = ($Remainder == 1) ? "st" : (($Remainder == 2) ? "nd" : (($Remainder == 3) ? "rd" : "th"));
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
52 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
53 $ValueWithSuffix = "${ValueWithSuffix}${Suffix}";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
54 return $ValueWithSuffix;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
55 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
56
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
57 # Check out the string: Doen it contain any white space characters?
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
58 sub ContainsWhiteSpaces {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
59 my($TheString) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
60 my($Status) = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
61
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
62 if (defined($TheString) && length($TheString)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
63 $Status = ($TheString =~ /[ \t\r\n\f]/ ) ? 1 : 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
64 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
65 return $Status;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
66 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
67
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
68 # Read the line, change to UNIX new line char, and chop off new line char as well...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
69 sub GetTextLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
70 my($TextFileRef) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
71 my($Line) = '';
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
72
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
73 # Get the next non empty line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
74 LINE: while (defined($_ = <$TextFileRef>)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
75 # Change Windows and Mac new line char to UNIX...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
76 s/(\r\n)|(\r)/\n/g;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
77
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
78 # Take out any new line char at the end by explicitly removing it instead of using
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
79 # chomp, which might not always work correctly on files generated on a system
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
80 # with a value of input line separator different from the current system...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
81 s/\n$//g;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
82
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
83 # Doesn't hurt to chomp...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
84 chomp;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
85
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
86 $Line = $_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
87 if (length $Line) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
88 last LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
89 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
90 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
91 return $Line;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
92 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
93
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
94 # Load data from a CSV file into the specified hash reference using a specific
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
95 # column for unique data key values.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
96 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
97 # The lines starting with # are treated as comments and ignored. First line
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
98 # not starting with # must contain column labels and the number of columns in
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
99 # all other data rows must match the number of column labels.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
100 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
101 # The first column is assumed to contain data key value by default; all other columns
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
102 # contain data as indicated in their column labels.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
103 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
104 # In order to avoid dependence of data access on the specified column labels, the
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
105 # column data is loaded into hash with Column<Num> hash keys, where column number
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
106 # start from 1. The data key column is not available as Colnum<Num> hash key;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
107 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
108 # The format of the data structure loaded into a specified hash reference is:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
109 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
110 # @{$TextDataMapRef->{DataKeys}} - Array of unique data keys
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
111 # @{$TextDataMapRef->{ColLabels}} - Array of column labels
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
112 # @{$TextDataMapRef->{DataColIDs}} - Array of data column IDs
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
113 # $TextDataMapRef->{NumOfCols} - Number of columns
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
114 # %{$TextDataMapRef->{DataKey}} - Hash keys pair: <DataKey, DataKey>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
115 # %{$TextDataMapRef->{DataCol<Num>}} - Hash keys pair: <DataCol<Num>, DataKey>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
116 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
117 # Caveats:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
118 # . The column number start from 1.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
119 # . Column data for data key column column is not loaded into <Column<Num>, DataKey> hash keys pairs.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
120 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
121 sub GetTextFileDataByUniqueKey {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
122 my($TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
123
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
124 return _GetTextFileData("UniqueKey", $TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
125 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
126
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
127 # Load data from a CSV file into the specified hash reference using a specific
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
128 # column for non-unique data key values.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
129 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
130 # The lines starting with # are treated as comments and ignored. First line
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
131 # not starting with # must contain column labels and the number of columns in
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
132 # all other data rows must match the number of column labels.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
133 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
134 # The first column is assumed to contain data key value by default; all other columns
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
135 # contain data as indicated in their column labels.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
136 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
137 # In order to avoid dependence of data access on the specified column labels, the
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
138 # column data is loaded into hash with Column<Num> hash keys, where column number
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
139 # start from 1. The data key column is not available as Colnum<Num> hash key;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
140 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
141 # The format of the data structure loaded into a specified hash reference is:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
142 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
143 # @{$TextDataMapRef->{DataKeys}} - Array of unique data keys
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
144 # @{$TextDataMapRef->{ColLabels}} - Array of column labels
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
145 # @{$TextDataMapRef->{DataColIDs}} - Array of data column IDs
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
146 # $TextDataMapRef->{NumOfCols} - Number of columns
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
147 # %{$TextDataMapRef->{DataKey}} - Hash keys pair: <DataKey, DataKey>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
148 # @{$TextDataMapRef->{DataCol<Num>}} - Hash keys pair with data as an array: <DataCol<Num>, DataKey>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
149 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
150 # Caveats:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
151 # . The column number start from 1.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
152 # . Column data for data key column column is not loaded into <Column<Num>, DataKey> hash keys pairs.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
153 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
154 sub GetTextFileDataByNonUniqueKey {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
155 my($TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
156
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
157 return _GetTextFileData("NonUniqueKey", $TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
158 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
159
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
160 # Loadtext file data using unique or non-uniqye data column key...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
161 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
162 sub _GetTextFileData {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
163 my($DataKeyMode, $TextDataFile, $TextDataMapRef, $DataKeyColNum, $InDelim) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
164 my($DataKeyColIndex, $LineCount, $IgnoredLineCount, $UniqueDataKeyMode, $DataKey, $Line, $NumOfCols, $ColIndex, $ColNum, $ColID, $ColValue, @LineWords, @ColLabels, @DataColIDs, @DataColNums);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
165
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
166 print "\nProcessing text data file $TextDataFile...\n";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
167
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
168 $UniqueDataKeyMode = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
169 if ($DataKeyMode =~ /^UniqueKey$/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
170 $UniqueDataKeyMode = 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
171 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
172
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
173 # Setup default values...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
174 $DataKeyColNum = defined $DataKeyColNum ? $DataKeyColNum : 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
175
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
176 if ($TextDataFile =~ /^\.tsv$/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
177 $InDelim = "\t";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
178 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
179 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
180 $InDelim = "\,";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
181 if ($InDelim =~ /^semicolon$/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
182 $InDelim = "\;";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
183 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
184 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
185
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
186 ($LineCount, $IgnoredLineCount) = (0) x 2;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
187
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
188 open TEXTDATAFILE, "$TextDataFile" or die "Couldn't open $TextDataFile: $! ...";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
189
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
190 # Skip lines up to column labels...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
191 LINE: while ($Line = TextUtil::GetTextLine(\*TEXTDATAFILE)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
192 $LineCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
193 if ($Line =~ /^#/) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
194 $IgnoredLineCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
195 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
196 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
197 last LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
198 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
199 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
200
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
201 # Initialize data map...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
202 %{$TextDataMapRef} = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
203 @{$TextDataMapRef->{DataKeys}} = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
204 @{$TextDataMapRef->{ColLabels}} = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
205 @{$TextDataMapRef->{DataColIDs}} = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
206 $TextDataMapRef->{NumOfCols} = undef;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
207
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
208 # Process column labels...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
209 @ColLabels= quotewords($InDelim, 0, $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
210 $NumOfCols = @ColLabels;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
211
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
212 if ($DataKeyColNum < 1 || $DataKeyColNum > $NumOfCols) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
213 warn "Warning: Ignoring text data file $TextDataFile: Invalid data key column number, $DataKeyColNum, specified. It must be > 0 or <= $NumOfCols, number of columns in the text file ...";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
214 return;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
215 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
216 $DataKeyColIndex = $DataKeyColNum - 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
217
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
218 $TextDataMapRef->{NumOfCols} = $NumOfCols;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
219 push @{$TextDataMapRef->{ColLabels}}, @ColLabels;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
220
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
221 # Set up column data IDs for tracking the data...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
222 @DataColNums = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
223 @DataColIDs = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
224 COLNUM: for $ColNum (1 .. $NumOfCols) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
225 if ($ColNum == $DataKeyColNum) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
226 next COLNUM;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
227 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
228 push @DataColNums, $ColNum;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
229 $ColID = "DataCol${ColNum}";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
230 push @DataColIDs, $ColID;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
231 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
232 push @{$TextDataMapRef->{DataColIDs}}, @DataColIDs;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
233
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
234 # Initialize column data hash...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
235 %{$TextDataMapRef->{DataKey}} = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
236 for $ColIndex (0 .. $#DataColNums) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
237 $ColNum = $DataColNums[$ColIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
238 $ColID = $DataColIDs[$ColIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
239 %{$TextDataMapRef->{$ColID}} = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
240 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
241
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
242 LINE: while ($Line = TextUtil::GetTextLine(\*TEXTDATAFILE)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
243 $LineCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
244 if ($Line =~ /^#/) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
245 $IgnoredLineCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
246 next LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
247 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
248
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
249 @LineWords = quotewords($InDelim, 0, $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
250 if (@LineWords != $NumOfCols) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
251 $IgnoredLineCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
252 warn "Warning: The number of data fields, @LineWords, in $TextDataFile must be $NumOfCols.\nIgnoring line number $LineCount: $Line...\n";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
253 next LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
254 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
255 $DataKey = $LineWords[$DataKeyColIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
256
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
257 if ($UniqueDataKeyMode) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
258 if (exists $TextDataMapRef->{DataKey}{$DataKey}) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
259 $IgnoredLineCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
260 warn "Warning: The data key, $DataKey, in data column key number, $DataKeyColNum, is already present.\nIgnoring line number $LineCount: $Line...\n";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
261 next LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
262 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
263 push @{$TextDataMapRef->{DataKeys}}, $DataKey;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
264 $TextDataMapRef->{DataKey}{$DataKey} = $DataKey;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
265 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
266 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
267 if (!exists $TextDataMapRef->{DataKey}{$DataKey}) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
268 push @{$TextDataMapRef->{DataKeys}}, $DataKey;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
269 $TextDataMapRef->{DataKey}{$DataKey} = $DataKey;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
270
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
271 for $ColIndex (0 .. $#DataColNums) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
272 $ColNum = $DataColNums[$ColIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
273 $ColID = $DataColIDs[$ColIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
274 @{$TextDataMapRef->{$ColID}{$DataKey}} = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
275 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
276 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
277 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
278
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
279 # Track column data values...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
280 for $ColIndex (0 .. $#DataColNums) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
281 $ColID = $DataColIDs[$ColIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
282
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
283 $ColNum = $DataColNums[$ColIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
284 $ColValue = $LineWords[$ColNum - 1];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
285
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
286 if ($UniqueDataKeyMode) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
287 $TextDataMapRef->{$ColID}{$DataKey} = $ColValue;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
288 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
289 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
290 push @{$TextDataMapRef->{$ColID}{$DataKey}}, $ColValue;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
291 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
292 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
293
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
294 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
295
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
296 print "\nTotal number of lines in file $TextDataFile: $LineCount\n";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
297 print "Total number of lines ignored: $IgnoredLineCount\n";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
298
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
299 close TEXTDATAFILE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
300 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
301
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
302 # Returns a 32 bit integer hash code using One-at-a-time algorithm By Bob Jenkins [Ref 38]. It's also implemented in
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
303 # Perl for internal hash keys in hv.h include file.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
304 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
305 # It's not clear how to force Perl perform unsigned integer arithmetic irrespective of the OS/Platform and
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
306 # the value of use64bitint flag used during its compilation.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
307 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
308 # In order to generate a consistent 32 bit has code across OS/platforms, the following methodology appear
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
309 # to work:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
310 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
311 # o Use MaxHashCodeMask to retrieve appropriate bits after left shifting by bit operators and additions
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
312 # o Stay away from "use integer" to avoid signed integer arithmetic for bit operators
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
313 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
314 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
315 # MaxHashCodeMask (2147483647) corresponds to the maximum value which can be stored in 31 bits
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
316 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
317 my($MaxHashCodeMask);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
318 $MaxHashCodeMask = 2**31 - 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
319
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
320 sub HashCode {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
321 my($String) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
322 my($HashCode, $Value, $ShiftedHashCode);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
323
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
324 $HashCode = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
325 for $Value (unpack('C*', $String)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
326 $HashCode += $Value;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
327
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
328 $ShiftedHashCode = $HashCode << 10;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
329 if ($ShiftedHashCode > $MaxHashCodeMask) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
330 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
331 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
332
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
333 $HashCode += $ShiftedHashCode;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
334 if ($HashCode > $MaxHashCodeMask) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
335 $HashCode = $HashCode & $MaxHashCodeMask;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
336 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
337
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
338 $HashCode ^= ($HashCode >> 6);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
339 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
340
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
341 $ShiftedHashCode = $HashCode << 3;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
342 if ($ShiftedHashCode > $MaxHashCodeMask) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
343 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
344 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
345
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
346 $HashCode += $ShiftedHashCode;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
347 if ($HashCode > $MaxHashCodeMask) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
348 $HashCode = $HashCode & $MaxHashCodeMask;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
349 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
350 $HashCode ^= ($HashCode >> 11);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
351
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
352 $ShiftedHashCode = $HashCode << 15;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
353 if ($ShiftedHashCode > $MaxHashCodeMask) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
354 $ShiftedHashCode = $ShiftedHashCode & $MaxHashCodeMask;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
355 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
356
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
357 $HashCode += $ShiftedHashCode;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
358 if ($HashCode > $MaxHashCodeMask) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
359 $HashCode = $HashCode & $MaxHashCodeMask;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
360 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
361 return $HashCode;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
362 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
363
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
364 # Check out the string: Is it defined and has a non zero length?
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
365 sub IsEmpty {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
366 my($TheString) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
367 my($Status) = 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
368
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
369 $Status = (defined($TheString) && length($TheString)) ? 0 : 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
370
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
371 return $Status;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
372 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
373
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
374 # Is first specified number power of second specified number...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
375 sub IsNumberPowerOfNumber {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
376 my($FirstNum, $SecondNum) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
377 my($PowerValue);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
378
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
379 $PowerValue = log($FirstNum)/log($SecondNum);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
380
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
381 return IsInteger($PowerValue) ? 1 : 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
382 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
383
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
384 # Check out the string: Is it an integer?
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
385 sub IsInteger {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
386 my($TheString) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
387 my($Status) = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
388
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
389 if (defined($TheString) && length($TheString)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
390 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
391 $TheString =~ s/^[+-]//;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
392 $Status = ($TheString =~ /[^0-9]/) ? 0 : 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
393 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
394 return $Status;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
395 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
396
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
397 # Check out the string: Is it an integer with value > 0?
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
398 sub IsPositiveInteger {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
399 my($TheString) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
400 my($Status) = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
401
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
402 $Status = IsInteger($TheString) ? ($TheString > 0 ? 1 : 0) : 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
403
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
404 return $Status;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
405 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
406
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
407
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
408 # Check out the string: Is it a float?
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
409 sub IsFloat {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
410 my($TheString) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
411 my($Status) = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
412
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
413 if (defined($TheString) && length($TheString)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
414 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
415 $TheString =~ s/^[+-]//;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
416 $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
417 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
418 return $Status;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
419 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
420
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
421 # Check out the string: Is it defined and has a non zero length?
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
422 sub IsNotEmpty {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
423 my($TheString) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
424 my($Status);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
425
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
426 $Status = IsEmpty($TheString) ? 0 : 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
427
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
428 return $Status;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
429 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
430
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
431 # Check out the string: Does it only contain numerical data?
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
432 sub IsNumerical {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
433 my($TheString) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
434 my($Status) = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
435
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
436 if (defined($TheString) && length($TheString)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
437 $TheString = RemoveLeadingAndTrailingWhiteSpaces($TheString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
438 $TheString =~ s/^[+-]//;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
439 $Status = ($TheString =~ /[^0-9.eE]/) ? 0 : (((length($TheString) == 1) && ($TheString =~ /[.eE]/)) ? 0 : 1);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
440 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
441 return $Status;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
442 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
443
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
444 # Join different words using delimiter and quote parameters. And return as
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
445 # a string value.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
446 sub JoinWords {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
447 my($Words, $Delim, $Quote) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
448
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
449 if (!@$Words) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
450 return "";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
451 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
452
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
453 $Quote = $Quote ? "\"" : "";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
454 my(@NewWords) = map { (defined($_) && length($_)) ? "${Quote}$_${Quote}" : "${Quote}${Quote}" } @$Words;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
455
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
456 return join $Delim, @NewWords;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
457 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
458
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
459 # Split string value containing quoted or unquoted words in to an array containing
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
460 # unquoted words.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
461 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
462 # This function is used to split strings generated by JoinWords.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
463 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
464 sub SplitWords {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
465 my($Line, $Delim) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
466
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
467 if (!$Line) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
468 return ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
469 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
470
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
471 # Is it a quoted string?
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
472 if ($Line =~ /^\"/) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
473 # Take out first and last quote...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
474 $Line =~ s/^\"//; $Line =~ s/\"$//;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
475
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
476 $Delim = "\"$Delim\"";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
477 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
478 return split /$Delim/, $Line;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
479 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
480
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
481 # Based on quote parameter, figure out what to do
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
482 sub QuoteAWord {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
483 my($Word, $Quote) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
484 my($QuotedWord);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
485
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
486 $QuotedWord = "";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
487 if ($Word) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
488 $QuotedWord = $Word;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
489 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
490 if ($Quote) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
491 $QuotedWord = "\"$QuotedWord\"";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
492 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
493 return ($QuotedWord);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
494 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
495
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
496 # Remove leading white space characters from the string...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
497 sub RemoveLeadingWhiteSpaces {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
498 my($InString) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
499 my($OutString, $TrailingString, $LeadingWhiteSpace);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
500
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
501 $OutString = $InString;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
502 if (length($InString) && ContainsWhiteSpaces($InString)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
503 $OutString =~ s/^([ \t\r\n\f]*)(.*?)$/$2/;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
504 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
505 return $OutString;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
506 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
507
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
508 # Remove Trailing white space characters from the string...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
509 sub RemoveTrailingWhiteSpaces {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
510 my($InString) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
511 my($OutString, $LeadingString, $TrailingWhiteSpace);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
512
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
513 $OutString = $InString;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
514 if (length($InString) && ContainsWhiteSpaces($InString)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
515 $OutString =~ s/^(.*?)([ \t\r\n\f]*)$/$1/;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
516 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
517 return $OutString;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
518 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
519
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
520 # Remove both leading and trailing white space characters from the string...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
521 sub RemoveLeadingAndTrailingWhiteSpaces {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
522 my($InString) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
523 my($OutString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
524
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
525 $OutString = $InString;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
526 if (length($InString) && ContainsWhiteSpaces($InString)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
527 $OutString =~ s/^([ \t\r\n\f]*)(.*?)([ \t\r\n\f]*)$/$2/;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
528 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
529 return $OutString;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
530 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
531
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
532 # Wrap text string...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
533 sub WrapText {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
534 my($InString, $WrapLength, $WrapDelimiter);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
535 my($OutString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
536
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
537 $WrapLength = 40;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
538 $WrapDelimiter = "\n";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
539 if (@_ == 3) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
540 ($InString, $WrapLength, $WrapDelimiter) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
541 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
542 elsif (@_ == 2) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
543 ($InString, $WrapLength) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
544 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
545 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
546 ($InString, $WrapLength) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
547 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
548 $OutString = $InString;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
549 if ($InString && (length($InString) > $WrapLength)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
550 $OutString = "";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
551 my($Index, $Length, $FirstPiece, $StringPiece);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
552 $Index = 0; $Length = length($InString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
553 $FirstPiece = 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
554 for ($Index = 0; $Index < $Length; $Index += $WrapLength) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
555 if (($Index + $WrapLength) < $Length) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
556 $StringPiece = substr($InString, $Index, $WrapLength);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
557 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
558 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
559 # Last piece of the string...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
560 $StringPiece = substr($InString, $Index, $WrapLength);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
561 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
562 if ($FirstPiece) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
563 $FirstPiece = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
564 $OutString = $StringPiece;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
565 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
566 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
567 $OutString .= "${WrapDelimiter}${StringPiece}";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
568 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
569 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
570 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
571 return $OutString;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
572 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
573
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
574 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
575
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
576 __END__
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
577
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
578 =head1 NAME
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
579
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
580 TextUtil
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
581
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
582 =head1 SYNOPSIS
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
583
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
584 use TextUtil;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
585
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
586 use TextUtil qw(:all);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
587
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
588 =head1 DESCRIPTION
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
589
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
590 B<TextUtil> module provides the following functions:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
591
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
592 AddNumberSuffix, ContainsWhiteSpaces, GetTextFileDataByNonUniqueKey,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
593 GetTextFileDataByUniqueKey, GetTextLine, HashCode, IsEmpty, IsFloat, IsInteger,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
594 IsNotEmpty, IsNumberPowerOfNumber, IsNumerical, IsPositiveInteger, JoinWords,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
595 QuoteAWord, RemoveLeadingAndTrailingWhiteSpaces, RemoveLeadingWhiteSpaces,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
596 RemoveTrailingWhiteSpaces, SplitWords, WrapText
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
597
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
598 =head1 FUNCTIONS
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
599
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
600 =over 4
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
601
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
602 =item B<AddNumberSuffix>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
603
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
604 $NumberWithSuffix = AddNumberSuffix($IntegerValue);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
605
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
606 Returns number with appropriate suffix: 0, 1st, 2nd, 3rd, 4th, and so on.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
607
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
608 =item B<ContainsWhiteSpaces>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
609
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
610 $Status = ContainsWhiteSpaces($TheString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
611
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
612 Returns 1 or 0 based on whether the string contains any white spaces.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
613
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
614 =item B<GetTextLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
615
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
616 $Line = GetTextLine(\*TEXTFILE);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
617
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
618 Reads next line from an already opened text file, takes out any carriage return,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
619 and returns it as a string. NULL is returned for EOF.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
620
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
621 =item B<GetTextFileDataByNonUniqueKey>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
622
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
623 GetTextFileDataByNonUniqueKey($TextDataFile, $TextDataMapRef,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
624 $DataKeyColNum, $InDelim);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
625
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
626 Load data from a text file into the specified hash reference using a specific
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
627 column for non-unique data key values.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
628
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
629 The lines starting with # are treated as comments and ignored. First line
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
630 not starting with # must contain column labels and the number of columns in
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
631 all other data rows must match the number of column labels.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
632
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
633 The first column is assumed to contain data key value by default; all other columns
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
634 contain data as indicated in their column labels.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
635
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
636 In order to avoid dependence of data access on the specified column labels, the
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
637 column data is loaded into hash with Column<Num> hash keys, where column number
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
638 start from 1. The data key column is not available as Colnum<Num> hash key;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
639
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
640 The format of the data structure loaded into a specified hash reference is:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
641
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
642 @{$TextDataMapRef->{DataKeys}} - Array of unique data keys
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
643 @{$TextDataMapRef->{ColLabels}} - Array of column labels
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
644 @{$TextDataMapRef->{DataColIDs}} - Array of data column IDs
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
645 $TextDataMapRef->{NumOfCols} - Number of columns
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
646 %{$TextDataMapRef->{DataKey}} - Hash keys pair: <DataKey, DataKey>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
647 @{$TextDataMapRef->{DataCol<Num>}} - Hash keys pair with data as an array:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
648 <DataCol<Num>, DataKey>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
649
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
650 =item B<GetTextFileDataByUniqueKey>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
651
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
652 GetTextFileDataByUniqueKey($TextDataFile, $TextDataMapRef, $DataKeyColNum,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
653 $InDelim);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
654
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
655 Load data from a text file into the specified hash reference using a a specific
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
656 column for unique data key values.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
657
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
658 The lines starting with # are treated as comments and ignored. First line
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
659 not starting with # must contain column labels and the number of columns in
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
660 all other data rows must match the number of column labels.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
661
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
662 The first column is assumed to contain data key value by default; all other columns
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
663 contain data as indicated in their column labels.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
664
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
665 In order to avoid dependence of data access on the specified column labels, the
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
666 column data is loaded into hash with Column<Num> hash keys, where column number
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
667 start from 1. The data key column is not available as Colnum<Num> hash key;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
668
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
669 The format of the data structure loaded into a specified hash reference is:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
670
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
671 @{$TextDataMapRef->{DataKeys}} - Array of unique data keys
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
672 @{$TextDataMapRef->{ColLabels}} - Array of column labels
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
673 @{$TextDataMapRef->{DataColIDs}} - Array of data column IDs
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
674 $TextDataMapRef->{NumOfCols} - Number of columns
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
675 %{$TextDataMapRef->{DataKey}} - Hash keys pair: <DataKey, DataKey>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
676 %{$TextDataMapRef->{DataCol<Num>}} - Hash keys pair: <DataCol<Num>, DataKey>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
677
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
678 =item B<HashCode>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
679
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
680 $HashCode = HashCode($TheString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
681
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
682 Returns a 32 bit integer hash code using One-at-a-time algorithm By Bob Jenkins [Ref 38].
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
683 It's also implemented in Perl for internal hash keys in hv.h include file.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
684
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
685 =item B<IsEmpty>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
686
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
687 $Status = IsEmpty($TheString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
688
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
689 Returns 1 or 0 based on whether the string is empty.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
690
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
691 =item B<IsInteger>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
692
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
693 $Status = IsInteger($TheString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
694
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
695 Returns 1 or 0 based on whether the string is a positive integer.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
696
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
697 =item B<IsPositiveInteger>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
698
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
699 $Status = IsPositiveInteger($TheString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
700
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
701 Returns 1 or 0 based on whether the string is an integer.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
702
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
703 =item B<IsFloat>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
704
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
705 $Status = IsFloat($TheString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
706
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
707 Returns 1 or 0 based on whether the string is a float.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
708
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
709 =item B<IsNotEmpty>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
710
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
711 $Status = IsNotEmpty($TheString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
712
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
713 Returns 0 or 1 based on whether the string is empty.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
714
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
715 =item B<IsNumerical>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
716
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
717 $Status = IsNumerical($TheString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
718
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
719 Returns 1 or 0 based on whether the string is a number.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
720
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
721 =item B<IsNumberPowerOfNumber>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
722
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
723 $Status = IsNumberPowerOfNumber($FirstNum, $SecondNum);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
724
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
725 Returns 1 or 0 based on whether the first number is a power of second number.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
726
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
727 =item B<JoinWords>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
728
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
729 $JoinedWords = JoinWords($Words, $Delim, $Quote);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
730
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
731 Joins different words using delimiter and quote parameters, and returns it
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
732 as a string.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
733
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
734 =item B<QuoteAWord>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
735
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
736 $QuotedWord = QuoteAWord($Word, $Quote);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
737
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
738 Returns a quoted string based on I<Quote> value.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
739
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
740 =item B<RemoveLeadingWhiteSpaces>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
741
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
742 $OutString = RemoveLeadingWhiteSpaces($InString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
743
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
744 Returns a string without any leading and traling white spaces.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
745
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
746 =item B<RemoveTrailingWhiteSpaces>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
747
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
748 $OutString = RemoveTrailingWhiteSpaces($InString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
749
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
750 Returns a string without any trailing white spaces.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
751
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
752 =item B<RemoveLeadingAndTrailingWhiteSpaces>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
753
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
754 $OutString = RemoveLeadingAndTrailingWhiteSpaces($InString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
755
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
756 Returns a string without any leading and traling white spaces.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
757
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
758 =item B<SplitWords>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
759
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
760 @Words = SplitWords($Line, $Delimiter);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
761
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
762 Returns an array I<Words> ontaining unquoted words generated after spliting
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
763 string value I<Line> containing quoted or unquoted words.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
764
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
765 This function is used to split strings generated by JoinWords as replacement
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
766 for Perl's core module funtion Text::ParseWords::quotewords() which dumps core
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
767 on very long strings.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
768
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
769 =item B<WrapText>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
770
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
771 $OutString = WrapText($InString, [$WrapLength, $WrapDelimiter]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
772
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
773 Returns a wrapped string. By default, I<WrapLenght> is I<40> and I<WrapDelimiter>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
774 is Unix new line character.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
775
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
776 =back
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
777
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
778 =head1 AUTHOR
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
779
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
780 Manish Sud <msud@san.rr.com>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
781
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
782 =head1 SEE ALSO
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
783
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
784 FileUtil.pm
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
785
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
786 =head1 COPYRIGHT
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
787
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
788 Copyright (C) 2015 Manish Sud. All rights reserved.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
789
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
790 This file is part of MayaChemTools.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
791
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
792 MayaChemTools is free software; you can redistribute it and/or modify it under
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
793 the terms of the GNU Lesser General Public License as published by the Free
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
794 Software Foundation; either version 3 of the License, or (at your option)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
795 any later version.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
796
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
797 =cut