Mercurial > repos > deepakjadmin > mayatool3_test3
diff mayachemtools/docs/scripts/man1/ModifySDFilesDataFields.1 @ 0:73ae111cf86f draft
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 11:55:01 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mayachemtools/docs/scripts/man1/ModifySDFilesDataFields.1 Wed Jan 20 11:55:01 2016 -0500 @@ -0,0 +1,386 @@ +.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.ie \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. nr % 0 +. rr F +.\} +.el \{\ +. de IX +.. +.\} +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "MODIFYSDFILESDATAFIELDS 1" +.TH MODIFYSDFILESDATAFIELDS 1 "2015-03-29" "perl v5.14.2" "MayaChemTools" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH "NAME" +ModifySDFilesDataFields.pl \- Modify data fields in SDFile(s) +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +ModifySDFilesDataFields.pl SDFile(s)... +.PP +ModifySDFilesDataFields.pl [\fB\-d, \-\-detail\fR infolevel] +[\fB\-\-datafieldscommon\fR newfieldlabel, newfieldvalue, [newfieldlabel, newfieldvalue,...]] +[\fB\-\-datafieldsmap\fR newfieldlabel, oldfieldlabel, [oldfieldlabel,...]; [newfieldlabel, oldfieldlabel, [oldfieldlabel,...]]] +[\fB\-\-datafieldsmapfile\fR filename] [\fB\-\-datafieldURL\fR URLDataFieldLabel, CGIScriptPath, CGIParamName, CmpdIDFieldLabel] +[\fB\-h, \-\-help\fR] [\fB\-k, \-\-keepolddatafields\fR all | unmappedonly | none] [\fB\-m, \-\-mode\fR molname | datafields | both] +[\fB\-\-molnamemode\fR datafield | labelprefix] [\fB\-\-molname\fR datafieldname or prefixstring] +[\fB\-\-molnamereplace\fR always | empty] [\fB\-o, \-\-overwrite\fR] [\fB\-r, \-\-root\fR rootname] +[\fB\-w, \-\-workingdir\fR dirname] SDFile(s)... +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +Modify molname line and data fields in \fISDFile(s)\fR. Molname line can be replaced by a +data field value or assigned a sequential \s-1ID\s0 prefixed with a specific string. For data +fields and modification of their values, these types of options are supported: replace +data field labels by another set of labels; combine values of multiple data fields and +assign a new label; add specific set of data field labels and values to all compound +records; and others. +.PP +The file names are separated by space.The valid file extensions are \fI.sdf\fR and \fI.sd\fR. +All other file names are ignored. All the \s-1SD\s0 files in a current directory can be specified +either by \fI*.sdf\fR or the current directory name. +.SH "OPTIONS" +.IX Header "OPTIONS" +.IP "\fB\-d, \-\-detail\fR \fIinfolevel\fR" 4 +.IX Item "-d, --detail infolevel" +Level of information to print about compound records being ignored. Default: \fI1\fR. Possible +values: \fI1, 2 or 3\fR. +.IP "\fB\-\-datafieldscommon\fR \fInewfieldlabel, newfieldvalue, [newfieldlabel, newfieldvalue,...]\fR" 4 +.IX Item "--datafieldscommon newfieldlabel, newfieldvalue, [newfieldlabel, newfieldvalue,...]" +Specify data field labels and values for addition to each compound record. It's a comma delimited +list of data field label and values pair. Default: \fInone\fR. +.Sp +Examples: +.Sp +.Vb 2 +\& DepositionDate,YYYY\-MM\-DD +\& Source,www.domainname.org,ReleaseData,YYYY\-MM\-DD +.Ve +.IP "\fB\-\-datafieldsmap\fR \fInewfieldlabel, oldfieldlabel, [oldfieldlabel,...]; [newfieldlabel, oldfieldlabel, [oldfieldlabel,...]]\fR" 4 +.IX Item "--datafieldsmap newfieldlabel, oldfieldlabel, [oldfieldlabel,...]; [newfieldlabel, oldfieldlabel, [oldfieldlabel,...]]" +Specify how various data field labels and values are combined to generate a new data field +labels and their values. All the comma delimited data fields, with in a semicolon delimited set, +are mapped to the first new data field label along with the data field values joined via new +line character. Default: \fInone\fR. +.Sp +Examples: +.Sp +.Vb 2 +\& Synonym,Name,SystematicName,Synonym;CmpdID,Extreg +\& HBondDonors,SumNHOH +.Ve +.IP "\fB\-\-datafieldsmapfile\fR \fIfilename\fR" 4 +.IX Item "--datafieldsmapfile filename" +Filename containing mapping of data fields. Format of data fields line in this file corresponds +to \fB\-\-datafieldsmap\fR option. Example: +.Sp +.Vb 2 +\& Line 1: Synonym,Name,SystematicName,Synonym;CmpdID,Extreg +\& Line 2: HBondDonors,SumNHOH +.Ve +.IP "\fB\-\-datafieldURL\fR \fIURLDataFieldLabel, CGIScriptPath, CGIParamName, CmpdIDFieldLabel\fR" 4 +.IX Item "--datafieldURL URLDataFieldLabel, CGIScriptPath, CGIParamName, CmpdIDFieldLabel" +Specify how to generate a \s-1URL\s0 for retrieving compound data from a web server and add it +to each compound record. \fIURLDataFieldLabel\fR is used as the data field label for \s-1URL\s0 value +which is created by combining \fICGIScriptPath,CGIParamName,CmpdIDFieldLabel\fR values: +CGIScriptPath?CGIParamName=CmpdIDFieldLabelValue. Default: \fInone\fR. +.Sp +Example: +.Sp +.Vb 1 +\& Source,http://www.yourdomain.org/GetCmpd.pl,Reg_ID,Mol_ID +.Ve +.IP "\fB\-h, \-\-help\fR" 4 +.IX Item "-h, --help" +Print this help message. +.IP "\fB\-k, \-\-keepolddatafields\fR \fIall | unmappedonly | none\fR" 4 +.IX Item "-k, --keepolddatafields all | unmappedonly | none" +Specify how to transfer old data fields from input SDFile(s) to new SDFile(s) during +\&\fIdatafields | both\fR value of \fB\-m, \-\-mode\fR option: keep all old data fields; write out the ones +not mapped to new fields as specified by \fB\-\-datafieldsmap\fR or <\-\-datafieldsmapfile> options; +or ignore all old data field labels. For \fImolname\fR \fB\-m \-\-mode\fR, old datafields are always kept. +Possible values: \fIall | unmappedonly | none\fR. Default: \fInone\fR. +.IP "\fB\-m, \-\-mode\fR \fImolname | datafields | both\fR" 4 +.IX Item "-m, --mode molname | datafields | both" +Specify how to modify SDFile(s): \fImolname\fR \- change molname line by another datafield or value; +\&\fIdatafield\fR \- modify data field labels and values by replacing one label by another, combining +multiple data field labels and values, adding specific set of data field labels and values to all compound, or +inserting an \s-1URL\s0 for compound retrieval to each record; \fIboth\fR \- change molname line and datafields +simultaneously. Possible values: \fImolname | datafields | both\fR. Default: \fImolname\fR +.IP "\fB\-\-molnamemode\fR \fIdatafield | labelprefix\fR" 4 +.IX Item "--molnamemode datafield | labelprefix" +Specify how to change molname line for \fB\-m \-\-mode\fR option values of \fImolname | both\fR: use +a datafield label value or assign a sequential \s-1ID\s0 prefixed with \fIlabelprefix\fR. Possible values: +\&\fIdatafield | labelprefix\fR. Default: \fIlabelprefix\fR. +.IP "\fB\-\-molname\fR \fIdatafieldname or prefixstring\fR" 4 +.IX Item "--molname datafieldname or prefixstring" +Molname generation method. For \fIdatafield\fR value of \fB\-\-molnamemode\fR option, it corresponds +to datafield label name whose value is used for molname; otherwise, it's a prefix string used for +generating compound IDs like labelprefixstring<Number>. Default value, \fICmpd\fR, generates +compound IDs like Cmpd<Number> for molname. +.IP "\fB\-\-molnamereplace\fR \fIalways | empty\fR" 4 +.IX Item "--molnamereplace always | empty" +Specify when to replace molname line for \fB\-m \-\-mode\fR option values of \fImolname | both\fR: +always replace the molname line using \fB\-\-molname\fR option or only when it's empty. Possible +values: \fIalways | empty\fR. Default: \fIempty\fR. +.IP "\fB\-o, \-\-overwrite\fR" 4 +.IX Item "-o, --overwrite" +Overwrite existing files. +.IP "\fB\-r, \-\-root\fR \fIrootname\fR" 4 +.IX Item "-r, --root rootname" +New \s-1SD\s0 file name is generated using the root: <Root>.<Ext>. Default new file +name: <InitialSDFileName>ModifiedDataFields.<Ext>. This option is ignored for multiple +input files. +.IP "\fB\-w, \-\-workingdir\fR \fIdirname\fR" 4 +.IX Item "-w, --workingdir dirname" +Location of working directory. Default: current directory. +.SH "EXAMPLES" +.IX Header "EXAMPLES" +To replace empty molname lines by Cmpd<CmpdNumber> and generate a new \s-1SD\s0 file +NewSample1.sdf, type: +.PP +.Vb 1 +\& % ModifySDFilesDataFields.pl \-o \-r NewSample1 Sample1.sdf +.Ve +.PP +To replace all molname lines by Mol_ID data field generate a new \s-1SD\s0 file +NewSample1.sdf, type: +.PP +.Vb 2 +\& % ModifySDFilesDataFields.pl \-\-molnamemode datafield +\& \-\-molnamereplace always \-r NewSample1 \-o Sample1.sdf +.Ve +.PP +To replace all molname lines by Mol_ID data field, map Name and CompoundName to +a new datafield Synonym, and generate a new \s-1SD\s0 file NewSample1.sdf, type: +.PP +.Vb 4 +\& % ModifySDFilesDataFields.pl \-\-molnamemode datafield +\& \-\-molnamereplace always \-\-molname Mol_ID \-\-mode both +\& \-\-datafieldsmap "Synonym,Name,CompoundName" \-r +\& NewSample1 \-o Sample1.sdf +.Ve +.PP +To replace all molname lines by Mol_ID data field, map Name and CompoundName to +a new datafield Synonym, add common fields ReleaseDate and Source, and +generate a new \s-1SD\s0 file NewSample1.sdf without keeping any old \s-1SD\s0 data fields, type: +.PP +.Vb 6 +\& % ModifySDFilesDataFields.pl \-\-molnamemode datafield +\& \-\-molnamereplace always \-\-molname Mol_ID \-\-mode both +\& \-\-datafieldsmap "Synonym,Name,CompoundName" +\& \-\-datafieldscommon "ReleaseDate,yyyy\-mm\-dd,Source, +\& www.mayachemtools.org" \-\-keepolddatafields none \-r +\& NewSample1 \-o Sample1.sdf +.Ve +.PP +\&\fBPreparing \s-1SD\s0 files PubChem deposition:\fR +.PP +Consider a \s-1SD\s0 file with these fields: Mol_ID, Name, Synonyms and Systematic_Name. +And Mol_ID data field uniquely identifies your compound. +.PP +To prepare a new \s-1SD\s0 file CmpdDataForPubChem.sdf containing only required +\&\s-1PUBCHEM_EXT_DATASOURCE_REGID\s0 field, type: +.PP +.Vb 4 +\& % ModifySDFilesDataFields.pl \-\-m datafields +\& \-\-datafieldsmap +\& "PUBCHEM_EXT_DATASOURCE_REGID,Mol_ID" +\& \-r CmpdDataForPubChem \-o Sample1.sdf +.Ve +.PP +To prepare a new \s-1SD\s0 file CmpdDataForPubChem.sdf containing only required +\&\s-1PUBCHEM_EXT_DATASOURCE_REGID\s0 field and replace molname line with Mol_ID, type: +.PP +.Vb 5 +\& % ModifySDFilesDataFields.pl \-\-molnamemode datafield +\& \-\-molnamereplace always \-\-molname Mol_ID \-\-mode both +\& \-\-datafieldsmap +\& "PUBCHEM_EXT_DATASOURCE_REGID,Mol_ID" +\& \-r CmpdDataForPubChem \-o Sample1.sdf +.Ve +.PP +In addition to required PubChem data field, you can also add optional PubChem data +fields. +.PP +To map your Name, Synonyms and Systematic_Name data fields to optional +\&\s-1PUBCHEM_SUBSTANCE_SYNONYM\s0 data field along with required \s-1ID\s0 field, type: +.PP +.Vb 6 +\& % ModifySDFilesDataFields.pl \-\-molnamemode datafield +\& \-\-molnamereplace always \-\-molname Mol_ID \-\-mode both +\& \-\-datafieldsmap +\& "PUBCHEM_EXT_DATASOURCE_REGID,Mol_ID; +\& PUBCHEM_SUBSTANCE_SYNONYM,Name,CompoundName" +\& \-r CmpdDataForPubChem \-o Sample1.sdf +.Ve +.PP +To add your <domain.org> as \s-1PUBCHEM_EXT_SUBSTANCE_URL\s0 and link substance +retrieval to your \s-1CGI\s0 script <http://www.yourdomain.org/GetCmpd.pl,Reg_ID,Mol_ID> +via \s-1PUBCHEM_EXT_DATASOURCE_REGID\s0 field along with optional and required +data fields, type: +.PP +.Vb 10 +\& % ModifySDFilesDataFields.pl \-\-molnamemode datafield +\& \-\-molnamereplace always \-\-molname Mol_ID \-\-mode both +\& \-\-datafieldsmap +\& "PUBCHEM_EXT_DATASOURCE_REGID,Mol_ID; +\& PUBCHEM_SUBSTANCE_SYNONYM,Name,CompoundName" +\& \-\-datafieldscommon +\& "PUBCHEM_EXT_SUBSTANCE_URL,domain.org" +\& \-\-datafieldURL "PUBCHEM_EXT_DATASOURCE_URL, +\& http://www.yourdomain.org/GetCmpd.pl,Reg_ID,Mol_ID" +\& \-r CmpdDataForPubChem \-o Sample1.sdf +.Ve +.PP +And to add a publication date and request a release data using +\&\s-1PUBCHEM_PUBLICATION_DATE\s0 and \s-1PUBCHEM_DEPOSITOR_RECORD_DATE\s0 data fields +along with all the data fields in earlier examples, type: +optional fields, type: +.PP +.Vb 12 +\& % ModifySDFilesDataFields.pl \-\-molnamemode datafield +\& \-\-molnamereplace always \-\-molname Mol_ID \-\-mode both +\& \-\-datafieldsmap +\& "PUBCHEM_EXT_DATASOURCE_REGID,Mol_ID; +\& PUBCHEM_SUBSTANCE_SYNONYM,Name,CompoundName" +\& \-\-datafieldURL "PUBCHEM_EXT_DATASOURCE_URL, +\& http://www.yourdomain.org/GetCmpd.pl,Reg_ID,Mol_ID" +\& \-\-datafieldscommon +\& "PUBCHEM_EXT_SUBSTANCE_URL,domain.org, +\& PUBCHEM_PUBLICATION_DATE,YYY\-MM\-DD, +\& PUBCHEM_DEPOSITOR_RECORD_DATE,YYYY\-MM\-DD" +\& \-r CmpdDataForPubChem \-o Sample1.sdf +.Ve +.SH "AUTHOR" +.IX Header "AUTHOR" +Manish Sud <msud@san.rr.com> +.SH "SEE ALSO" +.IX Header "SEE ALSO" +InfoSDFiles.pl, JoinSDFiles.pl, MergeTextFilesWithSD.pl, SplitSDFiles.pl, SDFilesToHTML.pl +.SH "COPYRIGHT" +.IX Header "COPYRIGHT" +Copyright (C) 2015 Manish Sud. All rights reserved. +.PP +This file is part of MayaChemTools. +.PP +MayaChemTools is free software; you can redistribute it and/or modify it under +the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version.