comparison docs/scripts/man1/ExtractFromTextFiles.1 @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 .\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22)
2 .\"
3 .\" Standard preamble:
4 .\" ========================================================================
5 .de Sp \" Vertical space (when we can't use .PP)
6 .if t .sp .5v
7 .if n .sp
8 ..
9 .de Vb \" Begin verbatim text
10 .ft CW
11 .nf
12 .ne \\$1
13 ..
14 .de Ve \" End verbatim text
15 .ft R
16 .fi
17 ..
18 .\" Set up some character translations and predefined strings. \*(-- will
19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
20 .\" double quote, and \*(R" will give a right double quote. \*(C+ will
21 .\" give a nicer C++. Capital omega is used to do unbreakable dashes and
22 .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
23 .\" nothing in troff, for use with C<>.
24 .tr \(*W-
25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
26 .ie n \{\
27 . ds -- \(*W-
28 . ds PI pi
29 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
30 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
31 . ds L" ""
32 . ds R" ""
33 . ds C` ""
34 . ds C' ""
35 'br\}
36 .el\{\
37 . ds -- \|\(em\|
38 . ds PI \(*p
39 . ds L" ``
40 . ds R" ''
41 'br\}
42 .\"
43 .\" Escape single quotes in literal strings from groff's Unicode transform.
44 .ie \n(.g .ds Aq \(aq
45 .el .ds Aq '
46 .\"
47 .\" If the F register is turned on, we'll generate index entries on stderr for
48 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
49 .\" entries marked with X<> in POD. Of course, you'll have to process the
50 .\" output yourself in some meaningful fashion.
51 .ie \nF \{\
52 . de IX
53 . tm Index:\\$1\t\\n%\t"\\$2"
54 ..
55 . nr % 0
56 . rr F
57 .\}
58 .el \{\
59 . de IX
60 ..
61 .\}
62 .\"
63 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
64 .\" Fear. Run. Save yourself. No user-serviceable parts.
65 . \" fudge factors for nroff and troff
66 .if n \{\
67 . ds #H 0
68 . ds #V .8m
69 . ds #F .3m
70 . ds #[ \f1
71 . ds #] \fP
72 .\}
73 .if t \{\
74 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
75 . ds #V .6m
76 . ds #F 0
77 . ds #[ \&
78 . ds #] \&
79 .\}
80 . \" simple accents for nroff and troff
81 .if n \{\
82 . ds ' \&
83 . ds ` \&
84 . ds ^ \&
85 . ds , \&
86 . ds ~ ~
87 . ds /
88 .\}
89 .if t \{\
90 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
91 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
92 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
93 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
94 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
95 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
96 .\}
97 . \" troff and (daisy-wheel) nroff accents
98 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
99 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
100 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
101 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
102 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
103 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
104 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
105 .ds ae a\h'-(\w'a'u*4/10)'e
106 .ds Ae A\h'-(\w'A'u*4/10)'E
107 . \" corrections for vroff
108 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
109 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
110 . \" for low resolution devices (crt and lpr)
111 .if \n(.H>23 .if \n(.V>19 \
112 \{\
113 . ds : e
114 . ds 8 ss
115 . ds o a
116 . ds d- d\h'-1'\(ga
117 . ds D- D\h'-1'\(hy
118 . ds th \o'bp'
119 . ds Th \o'LP'
120 . ds ae ae
121 . ds Ae AE
122 .\}
123 .rm #[ #] #H #V #F C
124 .\" ========================================================================
125 .\"
126 .IX Title "EXTRACTFROMTEXTFILES 1"
127 .TH EXTRACTFROMTEXTFILES 1 "2015-03-29" "perl v5.14.2" "MayaChemTools"
128 .\" For nroff, turn off justification. Always turn off hyphenation; it makes
129 .\" way too many mistakes in technical documents.
130 .if n .ad l
131 .nh
132 .SH "NAME"
133 ExtractFromTextFiles.pl \- Extract specific data from TextFile(s)
134 .SH "SYNOPSIS"
135 .IX Header "SYNOPSIS"
136 ExtractFromTextFiles.pl TextFile(s)...
137 .PP
138 ExtractFromTextFiles.pl [\fB\-c, \-\-colmode\fR colnum | collabel] [\fB\-\-categorycol \fR number | string]
139 [\fB\-\-columns\fR \*(L"colnum,[colnum]...\*(R" | \*(L"collabel,[collabel]...\*(R"] [\fB\-h, \-\-help\fR]
140 [\fB\-\-indelim\fR \fIcomma | semicolon\fR] [\fB\-m, \-\-mode \fR \fIcolumns | rows | categories\fR]
141 [\fB\-o, \-\-overwrite\fR] [\fB\-\-outdelim\fR \fIcomma | tab | semicolon\fR] [\fB\-q, \-\-quote\fR \fIyes | no\fR]
142 [\fB\-\-rows\fR \*(L"colid,value,criteria...\*(R" | \*(L"colid,value...\*(R" | \*(L"colid,mincolvalue,maxcolvalue\*(R" | \*(L"rownum,rownum,...\*(R" | colid | \*(L"minrownum,maxrownum\*(R"]
143 [ \fB\-\-rowsmode\fR rowsbycolvalue | rowsbycolvaluelist | rowsbycolvaluerange | rowbymincolvalue | rowbymaxcolvalue | rownums | rownumrange]
144 [\fB\-r, \-\-root\fR \fIrootname\fR] [\fB\-w, \-\-workingdir\fR \fIdirname\fR] TextFile(s)...
145 .SH "DESCRIPTION"
146 .IX Header "DESCRIPTION"
147 Extract column(s)/row(s) data from \fITextFile(s)\fR identified by column numbers or labels. Or categorize
148 data using a specified column category. During categorization, a summary text file is
149 generated containing category name and count; an additional text file, containing data for
150 for each category, is also generated. The file names are separated by space. The
151 valid file extensions are \fI.csv\fR and \fI.tsv\fR for comma/semicolon and tab delimited
152 text files respectively. All other file names are ignored. All the text files in a
153 current directory can be specified by \fI*.csv\fR, \fI*.tsv\fR, or the current directory
154 name. The \fB\-\-indelim\fR option determines the format of \fITextFile(s)\fR. Any file
155 which doesn't correspond to the format indicated by \fB\-\-indelim\fR option is ignored.
156 .SH "OPTIONS"
157 .IX Header "OPTIONS"
158 .IP "\fB\-c, \-\-colmode\fR \fIcolnum | collabel\fR" 4
159 .IX Item "-c, --colmode colnum | collabel"
160 Specify how columns are identified in \fITextFile(s)\fR: using column number or column
161 label. Possible values: \fIcolnum or collabel\fR. Default value: \fIcolnum\fR.
162 .IP "\fB\-\-categorycol \fR \fInumber | string\fR" 4
163 .IX Item "--categorycol number | string"
164 Column used to categorize data. Default value: First column.
165 .Sp
166 For \fIcolnum\fR value of \fB\-c, \-\-colmode\fR option, input value is a column number.
167 Example: \fI1\fR.
168 .Sp
169 For \fIcollabel\fR value of \fB\-c, \-\-colmode\fR option, input value is a column label.
170 Example: \fIMol_ID\fR.
171 .ie n .IP "\fB\-\-columns\fR \fI""colnum,[colnum]..."" | ""collabel,[collabel]...""\fR" 4
172 .el .IP "\fB\-\-columns\fR \fI``colnum,[colnum]...'' | ``collabel,[collabel]...''\fR" 4
173 .IX Item "--columns colnum,[colnum]... | collabel,[collabel]..."
174 List of comma delimited columns to extract. Default value: First column.
175 .Sp
176 For \fIcolnum\fR value of \fB\-c, \-\-colmode\fR option, input values format is:
177 \&\fIcolnum,colnum,...\fR. Example: \fI1,3,5\fR
178 .Sp
179 For \fIcollabel\fR value of \fB\-c, \-\-colmode\fR option, input values format is:
180 \&\fIcollabel,collabel,..\fR. Example: \fIMol_ID,MolWeight\fR
181 .IP "\fB\-h, \-\-help\fR" 4
182 .IX Item "-h, --help"
183 Print this help message.
184 .IP "\fB\-\-indelim\fR \fIcomma | semicolon\fR" 4
185 .IX Item "--indelim comma | semicolon"
186 Input delimiter for \s-1CSV\s0 \fITextFile(s)\fR. Possible values: \fIcomma or semicolon\fR.
187 Default value: \fIcomma\fR. For \s-1TSV\s0 files, this option is ignored and \fItab\fR is used as a
188 delimiter.
189 .IP "\fB\-m, \-\-mode \fR \fIcolumns | rows | categories\fR" 4
190 .IX Item "-m, --mode columns | rows | categories"
191 Specify what to extract from \fITextFile(s)\fR. Possible values: \fIcolumns, rows,
192 or categories\fR. Default value: \fIcolumns\fR.
193 .Sp
194 For \fIcolumns\fR mode, data for appropriate columns specified by \fB\-\-columns\fR option
195 is extracted from \fITextFile(s)\fR and placed into new text files.
196 .Sp
197 For \fIrows\fR mode, appropriate rows specified in conjuction with \fB\-\-rowsmode\fR and
198 \&\fBrows\fR options are extracted from \fITextFile(s)\fR and placed into new text files.
199 .Sp
200 For \fIcategories\fR mode, coulmn specified by \fB\-\-categorycol\fR is
201 used to categorize data, and a summary text file is generated
202 containing category name and count; an additional text file, containing data for
203 for each category, is also generated.
204 .IP "\fB\-o, \-\-overwrite\fR" 4
205 .IX Item "-o, --overwrite"
206 Overwrite existing files.
207 .IP "\fB\-\-outdelim\fR \fIcomma | tab | semicolon\fR." 4
208 .IX Item "--outdelim comma | tab | semicolon."
209 Output text file delimiter. Possible values: \fIcomma, tab, or semicolon\fR.
210 Default value: \fIcomma\fR
211 .IP "\fB\-q, \-\-quote\fR \fIyes | no\fR" 4
212 .IX Item "-q, --quote yes | no"
213 Put quotes around column values in output text file. Possible values: \fIyes or
214 no\fR. Default value: \fIyes\fR.
215 .IP "\fB\-r, \-\-root\fR \fIrootname\fR" 4
216 .IX Item "-r, --root rootname"
217 New file name is generated using the root: <Root>.<Ext>. Default for new file
218 names: <TextFile>CategoriesSummary.<Ext>, <TextFile>ExtractedColumns.<Ext>, and
219 <TextFile>ExtractedRows.<Ext> for \fIcategories\fR, \fIcolumns\fR, and \fIrows\fR mode
220 respectively. And <TextFile>Category<CategoryName>.<Ext>
221 for each category retrieved from each text file. The output file type determines <Ext>
222 value: csv and tsv for \s-1CSV\s0, and \s-1TSV\s0 files respectively.
223 .Sp
224 This option is ignored for multiple input files.
225 .ie n .IP "\fB\-\-rows\fR \fI""colid,value,criteria..."" | ""colid,value..."" | ""colid,mincolvalue,maxcolvalue"" | ""rownum,rownum,..."" | colid | ""minrownum,maxrownum""\fR" 4
226 .el .IP "\fB\-\-rows\fR \fI``colid,value,criteria...'' | ``colid,value...'' | ``colid,mincolvalue,maxcolvalue'' | ``rownum,rownum,...'' | colid | ``minrownum,maxrownum''\fR" 4
227 .IX Item "--rows colid,value,criteria... | colid,value... | colid,mincolvalue,maxcolvalue | rownum,rownum,... | colid | minrownum,maxrownum"
228 This value is \fB\-\-rowsmode\fR specific. In general, it's a list of comma separated column ids and
229 associated mode specific value. Based on Column ids specification, column label or number, is
230 controlled by \fB\-c, \-\-colmode\fR option.
231 .Sp
232 First line containing column labels is always written out. And value comparisons assume
233 numerical column data.
234 .Sp
235 For \fIrowsbycolvalue\fR mode, input value format contains these triplets:
236 \&\fIcolid,value, criteria...\fR. Possible values for criteria: \fIle, ge or eq\fR.
237 Examples:
238 .Sp
239 .Vb 2
240 \& MolWt,450,le
241 \& MolWt,450,le,LogP,5,le,SumNumNO,10,le,SumNHOH,5,le
242 .Ve
243 .Sp
244 For \fIrowsbycolvaluelist\fR mode, input value format is: \fIcolid,value...\fR. Examples:
245 .Sp
246 .Vb 2
247 \& Mol_ID,20
248 \& Mol_ID,20,1002,1115
249 .Ve
250 .Sp
251 For \fIrowsbycolvaluerange\fR mode, input value format is: \fIcolid,mincolvalue,maxcolvalue\fR. Examples:
252 .Sp
253 .Vb 1
254 \& MolWt,100,450
255 .Ve
256 .Sp
257 For \fIrowbymincolvalue, rowbymaxcolvalue\fR modes, input value format is: \fIcolid\fR.
258 .Sp
259 For \fIrownum\fR mode, input value format is: \fIrownum\fR. Default value: \fI2\fR.
260 .Sp
261 For \fIrownumrange\fR mode, input value format is: \fIminrownum, maxrownum\fR. Examples:
262 .Sp
263 .Vb 1
264 \& 10,40
265 .Ve
266 .IP "\fB\-\-rowsmode\fR \fIrowsbycolvalue | rowsbycolvaluelist | rowsbycolvaluerange | rowbymincolvalue | rowbymaxcolvalue | rownums | rownumrange\fR" 4
267 .IX Item "--rowsmode rowsbycolvalue | rowsbycolvaluelist | rowsbycolvaluerange | rowbymincolvalue | rowbymaxcolvalue | rownums | rownumrange"
268 Specify how to extract rows from \fITextFile(s)\fR. Possible values: \fIrowsbycolvalue, rowsbycolvaluelist, rowsbycolvaluerange,
269 rowbymincolvalue, rowbymaxcolvalue, rownum, rownumrange\fR. Default value: \fIrownum\fR.
270 .Sp
271 Use \fB\-\-rows\fR option to list rows criterion used for extraction of rows from
272 \&\fITextFile(s)\fR.
273 .IP "\fB\-w, \-\-workingdir\fR \fIdirname\fR" 4
274 .IX Item "-w, --workingdir dirname"
275 Location of working directory. Default: current directory.
276 .SH "EXAMPLES"
277 .IX Header "EXAMPLES"
278 To extract first column from a text file and generate a new \s-1CSV\s0 text file NewSample1.csv,
279 type:
280 .PP
281 .Vb 1
282 \& % ExtractFromTextFiles.pl \-r NewSample1 \-o Sample1.csv
283 .Ve
284 .PP
285 To extract columns Mol_ID, MolWeight, and \s-1NAME\s0 from Sample1.csv and generate a new
286 textfile NewSample1.tsv with no quotes, type:
287 .PP
288 .Vb 3
289 \& % ExtractFromTextFiles.pl \-m columns \-c collabel \-\-columns "Mol_ID,
290 \& MolWeight,NAME" \-\-outdelim tab \-\-quote no \-r NewSample1
291 \& \-o Sample1.csv
292 .Ve
293 .PP
294 To extract rows containing values for MolWeight column of less than 450 from
295 Sample1.csv and generate a new textfile NewSample1.csv, type:
296 .PP
297 .Vb 3
298 \& % ExtractFromTextFiles.pl \-m rows \-\-rowsmode rowsbycolvalue
299 \& \-c collabel \-\-rows MolWeight,450,le \-r NewSample1
300 \& \-o Sample1.csv
301 .Ve
302 .PP
303 To extract rows containing values for MolWeight column between 400 and 500 from
304 Sample1.csv and generate a new textfile NewSample1.csv, type:
305 .PP
306 .Vb 3
307 \& % ExtractFromTextFiles.pl \-m rows \-\-rowsmode rowsbycolvaluerange
308 \& \-c collabel \-\-rows MolWeight,450,500 \-r NewSample1
309 \& \-o Sample1.csv
310 .Ve
311 .PP
312 To extract a row containing minimum value for column MolWeight from Sample1.csv and generate
313 a new textfile NewSample1.csv, type:
314 .PP
315 .Vb 3
316 \& % ExtractFromTextFiles.pl \-m rows \-\-rowsmode rowbymincolvalue
317 \& \-c collabel \-\-rows MolWeight \-r NewSample1
318 \& \-o Sample1.csv
319 .Ve
320 .SH "AUTHOR"
321 .IX Header "AUTHOR"
322 Manish Sud <msud@san.rr.com>
323 .SH "SEE ALSO"
324 .IX Header "SEE ALSO"
325 JoinTextFiles.pl, MergeTextFilesWithSD.pl, ModifyTextFilesFormat.pl, SplitTextFiles.pl
326 .SH "COPYRIGHT"
327 .IX Header "COPYRIGHT"
328 Copyright (C) 2015 Manish Sud. All rights reserved.
329 .PP
330 This file is part of MayaChemTools.
331 .PP
332 MayaChemTools is free software; you can redistribute it and/or modify it under
333 the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free
334 Software Foundation; either version 3 of the License, or (at your option)
335 any later version.