Mercurial > repos > deepakjadmin > mayatool3_test3
diff mayachemtools/docs/scripts/man1/ExtractFromTextFiles.1 @ 0:73ae111cf86f draft
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 11:55:01 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mayachemtools/docs/scripts/man1/ExtractFromTextFiles.1 Wed Jan 20 11:55:01 2016 -0500 @@ -0,0 +1,335 @@ +.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.ie \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. nr % 0 +. rr F +.\} +.el \{\ +. de IX +.. +.\} +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "EXTRACTFROMTEXTFILES 1" +.TH EXTRACTFROMTEXTFILES 1 "2015-03-29" "perl v5.14.2" "MayaChemTools" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH "NAME" +ExtractFromTextFiles.pl \- Extract specific data from TextFile(s) +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +ExtractFromTextFiles.pl TextFile(s)... +.PP +ExtractFromTextFiles.pl [\fB\-c, \-\-colmode\fR colnum | collabel] [\fB\-\-categorycol \fR number | string] +[\fB\-\-columns\fR \*(L"colnum,[colnum]...\*(R" | \*(L"collabel,[collabel]...\*(R"] [\fB\-h, \-\-help\fR] +[\fB\-\-indelim\fR \fIcomma | semicolon\fR] [\fB\-m, \-\-mode \fR \fIcolumns | rows | categories\fR] +[\fB\-o, \-\-overwrite\fR] [\fB\-\-outdelim\fR \fIcomma | tab | semicolon\fR] [\fB\-q, \-\-quote\fR \fIyes | no\fR] +[\fB\-\-rows\fR \*(L"colid,value,criteria...\*(R" | \*(L"colid,value...\*(R" | \*(L"colid,mincolvalue,maxcolvalue\*(R" | \*(L"rownum,rownum,...\*(R" | colid | \*(L"minrownum,maxrownum\*(R"] +[ \fB\-\-rowsmode\fR rowsbycolvalue | rowsbycolvaluelist | rowsbycolvaluerange | rowbymincolvalue | rowbymaxcolvalue | rownums | rownumrange] +[\fB\-r, \-\-root\fR \fIrootname\fR] [\fB\-w, \-\-workingdir\fR \fIdirname\fR] TextFile(s)... +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +Extract column(s)/row(s) data from \fITextFile(s)\fR identified by column numbers or labels. Or categorize +data using a specified column category. During categorization, a summary text file is +generated containing category name and count; an additional text file, containing data for +for each category, is also generated. The file names are separated by space. The +valid file extensions are \fI.csv\fR and \fI.tsv\fR for comma/semicolon and tab delimited +text files respectively. All other file names are ignored. All the text files in a +current directory can be specified by \fI*.csv\fR, \fI*.tsv\fR, or the current directory +name. The \fB\-\-indelim\fR option determines the format of \fITextFile(s)\fR. Any file +which doesn't correspond to the format indicated by \fB\-\-indelim\fR option is ignored. +.SH "OPTIONS" +.IX Header "OPTIONS" +.IP "\fB\-c, \-\-colmode\fR \fIcolnum | collabel\fR" 4 +.IX Item "-c, --colmode colnum | collabel" +Specify how columns are identified in \fITextFile(s)\fR: using column number or column +label. Possible values: \fIcolnum or collabel\fR. Default value: \fIcolnum\fR. +.IP "\fB\-\-categorycol \fR \fInumber | string\fR" 4 +.IX Item "--categorycol number | string" +Column used to categorize data. Default value: First column. +.Sp +For \fIcolnum\fR value of \fB\-c, \-\-colmode\fR option, input value is a column number. +Example: \fI1\fR. +.Sp +For \fIcollabel\fR value of \fB\-c, \-\-colmode\fR option, input value is a column label. +Example: \fIMol_ID\fR. +.ie n .IP "\fB\-\-columns\fR \fI""colnum,[colnum]..."" | ""collabel,[collabel]...""\fR" 4 +.el .IP "\fB\-\-columns\fR \fI``colnum,[colnum]...'' | ``collabel,[collabel]...''\fR" 4 +.IX Item "--columns colnum,[colnum]... | collabel,[collabel]..." +List of comma delimited columns to extract. Default value: First column. +.Sp +For \fIcolnum\fR value of \fB\-c, \-\-colmode\fR option, input values format is: +\&\fIcolnum,colnum,...\fR. Example: \fI1,3,5\fR +.Sp +For \fIcollabel\fR value of \fB\-c, \-\-colmode\fR option, input values format is: +\&\fIcollabel,collabel,..\fR. Example: \fIMol_ID,MolWeight\fR +.IP "\fB\-h, \-\-help\fR" 4 +.IX Item "-h, --help" +Print this help message. +.IP "\fB\-\-indelim\fR \fIcomma | semicolon\fR" 4 +.IX Item "--indelim comma | semicolon" +Input delimiter for \s-1CSV\s0 \fITextFile(s)\fR. Possible values: \fIcomma or semicolon\fR. +Default value: \fIcomma\fR. For \s-1TSV\s0 files, this option is ignored and \fItab\fR is used as a +delimiter. +.IP "\fB\-m, \-\-mode \fR \fIcolumns | rows | categories\fR" 4 +.IX Item "-m, --mode columns | rows | categories" +Specify what to extract from \fITextFile(s)\fR. Possible values: \fIcolumns, rows, +or categories\fR. Default value: \fIcolumns\fR. +.Sp +For \fIcolumns\fR mode, data for appropriate columns specified by \fB\-\-columns\fR option +is extracted from \fITextFile(s)\fR and placed into new text files. +.Sp +For \fIrows\fR mode, appropriate rows specified in conjuction with \fB\-\-rowsmode\fR and +\&\fBrows\fR options are extracted from \fITextFile(s)\fR and placed into new text files. +.Sp +For \fIcategories\fR mode, coulmn specified by \fB\-\-categorycol\fR is +used to categorize data, and a summary text file is generated +containing category name and count; an additional text file, containing data for +for each category, is also generated. +.IP "\fB\-o, \-\-overwrite\fR" 4 +.IX Item "-o, --overwrite" +Overwrite existing files. +.IP "\fB\-\-outdelim\fR \fIcomma | tab | semicolon\fR." 4 +.IX Item "--outdelim comma | tab | semicolon." +Output text file delimiter. Possible values: \fIcomma, tab, or semicolon\fR. +Default value: \fIcomma\fR +.IP "\fB\-q, \-\-quote\fR \fIyes | no\fR" 4 +.IX Item "-q, --quote yes | no" +Put quotes around column values in output text file. Possible values: \fIyes or +no\fR. Default value: \fIyes\fR. +.IP "\fB\-r, \-\-root\fR \fIrootname\fR" 4 +.IX Item "-r, --root rootname" +New file name is generated using the root: <Root>.<Ext>. Default for new file +names: <TextFile>CategoriesSummary.<Ext>, <TextFile>ExtractedColumns.<Ext>, and +<TextFile>ExtractedRows.<Ext> for \fIcategories\fR, \fIcolumns\fR, and \fIrows\fR mode +respectively. And <TextFile>Category<CategoryName>.<Ext> +for each category retrieved from each text file. The output file type determines <Ext> +value: csv and tsv for \s-1CSV\s0, and \s-1TSV\s0 files respectively. +.Sp +This option is ignored for multiple input files. +.ie n .IP "\fB\-\-rows\fR \fI""colid,value,criteria..."" | ""colid,value..."" | ""colid,mincolvalue,maxcolvalue"" | ""rownum,rownum,..."" | colid | ""minrownum,maxrownum""\fR" 4 +.el .IP "\fB\-\-rows\fR \fI``colid,value,criteria...'' | ``colid,value...'' | ``colid,mincolvalue,maxcolvalue'' | ``rownum,rownum,...'' | colid | ``minrownum,maxrownum''\fR" 4 +.IX Item "--rows colid,value,criteria... | colid,value... | colid,mincolvalue,maxcolvalue | rownum,rownum,... | colid | minrownum,maxrownum" +This value is \fB\-\-rowsmode\fR specific. In general, it's a list of comma separated column ids and +associated mode specific value. Based on Column ids specification, column label or number, is +controlled by \fB\-c, \-\-colmode\fR option. +.Sp +First line containing column labels is always written out. And value comparisons assume +numerical column data. +.Sp +For \fIrowsbycolvalue\fR mode, input value format contains these triplets: +\&\fIcolid,value, criteria...\fR. Possible values for criteria: \fIle, ge or eq\fR. +Examples: +.Sp +.Vb 2 +\& MolWt,450,le +\& MolWt,450,le,LogP,5,le,SumNumNO,10,le,SumNHOH,5,le +.Ve +.Sp +For \fIrowsbycolvaluelist\fR mode, input value format is: \fIcolid,value...\fR. Examples: +.Sp +.Vb 2 +\& Mol_ID,20 +\& Mol_ID,20,1002,1115 +.Ve +.Sp +For \fIrowsbycolvaluerange\fR mode, input value format is: \fIcolid,mincolvalue,maxcolvalue\fR. Examples: +.Sp +.Vb 1 +\& MolWt,100,450 +.Ve +.Sp +For \fIrowbymincolvalue, rowbymaxcolvalue\fR modes, input value format is: \fIcolid\fR. +.Sp +For \fIrownum\fR mode, input value format is: \fIrownum\fR. Default value: \fI2\fR. +.Sp +For \fIrownumrange\fR mode, input value format is: \fIminrownum, maxrownum\fR. Examples: +.Sp +.Vb 1 +\& 10,40 +.Ve +.IP "\fB\-\-rowsmode\fR \fIrowsbycolvalue | rowsbycolvaluelist | rowsbycolvaluerange | rowbymincolvalue | rowbymaxcolvalue | rownums | rownumrange\fR" 4 +.IX Item "--rowsmode rowsbycolvalue | rowsbycolvaluelist | rowsbycolvaluerange | rowbymincolvalue | rowbymaxcolvalue | rownums | rownumrange" +Specify how to extract rows from \fITextFile(s)\fR. Possible values: \fIrowsbycolvalue, rowsbycolvaluelist, rowsbycolvaluerange, +rowbymincolvalue, rowbymaxcolvalue, rownum, rownumrange\fR. Default value: \fIrownum\fR. +.Sp +Use \fB\-\-rows\fR option to list rows criterion used for extraction of rows from +\&\fITextFile(s)\fR. +.IP "\fB\-w, \-\-workingdir\fR \fIdirname\fR" 4 +.IX Item "-w, --workingdir dirname" +Location of working directory. Default: current directory. +.SH "EXAMPLES" +.IX Header "EXAMPLES" +To extract first column from a text file and generate a new \s-1CSV\s0 text file NewSample1.csv, +type: +.PP +.Vb 1 +\& % ExtractFromTextFiles.pl \-r NewSample1 \-o Sample1.csv +.Ve +.PP +To extract columns Mol_ID, MolWeight, and \s-1NAME\s0 from Sample1.csv and generate a new +textfile NewSample1.tsv with no quotes, type: +.PP +.Vb 3 +\& % ExtractFromTextFiles.pl \-m columns \-c collabel \-\-columns "Mol_ID, +\& MolWeight,NAME" \-\-outdelim tab \-\-quote no \-r NewSample1 +\& \-o Sample1.csv +.Ve +.PP +To extract rows containing values for MolWeight column of less than 450 from +Sample1.csv and generate a new textfile NewSample1.csv, type: +.PP +.Vb 3 +\& % ExtractFromTextFiles.pl \-m rows \-\-rowsmode rowsbycolvalue +\& \-c collabel \-\-rows MolWeight,450,le \-r NewSample1 +\& \-o Sample1.csv +.Ve +.PP +To extract rows containing values for MolWeight column between 400 and 500 from +Sample1.csv and generate a new textfile NewSample1.csv, type: +.PP +.Vb 3 +\& % ExtractFromTextFiles.pl \-m rows \-\-rowsmode rowsbycolvaluerange +\& \-c collabel \-\-rows MolWeight,450,500 \-r NewSample1 +\& \-o Sample1.csv +.Ve +.PP +To extract a row containing minimum value for column MolWeight from Sample1.csv and generate +a new textfile NewSample1.csv, type: +.PP +.Vb 3 +\& % ExtractFromTextFiles.pl \-m rows \-\-rowsmode rowbymincolvalue +\& \-c collabel \-\-rows MolWeight \-r NewSample1 +\& \-o Sample1.csv +.Ve +.SH "AUTHOR" +.IX Header "AUTHOR" +Manish Sud <msud@san.rr.com> +.SH "SEE ALSO" +.IX Header "SEE ALSO" +JoinTextFiles.pl, MergeTextFilesWithSD.pl, ModifyTextFilesFormat.pl, SplitTextFiles.pl +.SH "COPYRIGHT" +.IX Header "COPYRIGHT" +Copyright (C) 2015 Manish Sud. All rights reserved. +.PP +This file is part of MayaChemTools. +.PP +MayaChemTools is free software; you can redistribute it and/or modify it under +the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version.