comparison mayachemtools/docs/scripts/man1/ModifySDFilesDataFields.1 @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:73ae111cf86f
1 .\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22)
2 .\"
3 .\" Standard preamble:
4 .\" ========================================================================
5 .de Sp \" Vertical space (when we can't use .PP)
6 .if t .sp .5v
7 .if n .sp
8 ..
9 .de Vb \" Begin verbatim text
10 .ft CW
11 .nf
12 .ne \\$1
13 ..
14 .de Ve \" End verbatim text
15 .ft R
16 .fi
17 ..
18 .\" Set up some character translations and predefined strings. \*(-- will
19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
20 .\" double quote, and \*(R" will give a right double quote. \*(C+ will
21 .\" give a nicer C++. Capital omega is used to do unbreakable dashes and
22 .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
23 .\" nothing in troff, for use with C<>.
24 .tr \(*W-
25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
26 .ie n \{\
27 . ds -- \(*W-
28 . ds PI pi
29 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
30 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
31 . ds L" ""
32 . ds R" ""
33 . ds C` ""
34 . ds C' ""
35 'br\}
36 .el\{\
37 . ds -- \|\(em\|
38 . ds PI \(*p
39 . ds L" ``
40 . ds R" ''
41 'br\}
42 .\"
43 .\" Escape single quotes in literal strings from groff's Unicode transform.
44 .ie \n(.g .ds Aq \(aq
45 .el .ds Aq '
46 .\"
47 .\" If the F register is turned on, we'll generate index entries on stderr for
48 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
49 .\" entries marked with X<> in POD. Of course, you'll have to process the
50 .\" output yourself in some meaningful fashion.
51 .ie \nF \{\
52 . de IX
53 . tm Index:\\$1\t\\n%\t"\\$2"
54 ..
55 . nr % 0
56 . rr F
57 .\}
58 .el \{\
59 . de IX
60 ..
61 .\}
62 .\"
63 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
64 .\" Fear. Run. Save yourself. No user-serviceable parts.
65 . \" fudge factors for nroff and troff
66 .if n \{\
67 . ds #H 0
68 . ds #V .8m
69 . ds #F .3m
70 . ds #[ \f1
71 . ds #] \fP
72 .\}
73 .if t \{\
74 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
75 . ds #V .6m
76 . ds #F 0
77 . ds #[ \&
78 . ds #] \&
79 .\}
80 . \" simple accents for nroff and troff
81 .if n \{\
82 . ds ' \&
83 . ds ` \&
84 . ds ^ \&
85 . ds , \&
86 . ds ~ ~
87 . ds /
88 .\}
89 .if t \{\
90 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
91 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
92 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
93 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
94 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
95 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
96 .\}
97 . \" troff and (daisy-wheel) nroff accents
98 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
99 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
100 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
101 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
102 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
103 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
104 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
105 .ds ae a\h'-(\w'a'u*4/10)'e
106 .ds Ae A\h'-(\w'A'u*4/10)'E
107 . \" corrections for vroff
108 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
109 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
110 . \" for low resolution devices (crt and lpr)
111 .if \n(.H>23 .if \n(.V>19 \
112 \{\
113 . ds : e
114 . ds 8 ss
115 . ds o a
116 . ds d- d\h'-1'\(ga
117 . ds D- D\h'-1'\(hy
118 . ds th \o'bp'
119 . ds Th \o'LP'
120 . ds ae ae
121 . ds Ae AE
122 .\}
123 .rm #[ #] #H #V #F C
124 .\" ========================================================================
125 .\"
126 .IX Title "MODIFYSDFILESDATAFIELDS 1"
127 .TH MODIFYSDFILESDATAFIELDS 1 "2015-03-29" "perl v5.14.2" "MayaChemTools"
128 .\" For nroff, turn off justification. Always turn off hyphenation; it makes
129 .\" way too many mistakes in technical documents.
130 .if n .ad l
131 .nh
132 .SH "NAME"
133 ModifySDFilesDataFields.pl \- Modify data fields in SDFile(s)
134 .SH "SYNOPSIS"
135 .IX Header "SYNOPSIS"
136 ModifySDFilesDataFields.pl SDFile(s)...
137 .PP
138 ModifySDFilesDataFields.pl [\fB\-d, \-\-detail\fR infolevel]
139 [\fB\-\-datafieldscommon\fR newfieldlabel, newfieldvalue, [newfieldlabel, newfieldvalue,...]]
140 [\fB\-\-datafieldsmap\fR newfieldlabel, oldfieldlabel, [oldfieldlabel,...]; [newfieldlabel, oldfieldlabel, [oldfieldlabel,...]]]
141 [\fB\-\-datafieldsmapfile\fR filename] [\fB\-\-datafieldURL\fR URLDataFieldLabel, CGIScriptPath, CGIParamName, CmpdIDFieldLabel]
142 [\fB\-h, \-\-help\fR] [\fB\-k, \-\-keepolddatafields\fR all | unmappedonly | none] [\fB\-m, \-\-mode\fR molname | datafields | both]
143 [\fB\-\-molnamemode\fR datafield | labelprefix] [\fB\-\-molname\fR datafieldname or prefixstring]
144 [\fB\-\-molnamereplace\fR always | empty] [\fB\-o, \-\-overwrite\fR] [\fB\-r, \-\-root\fR rootname]
145 [\fB\-w, \-\-workingdir\fR dirname] SDFile(s)...
146 .SH "DESCRIPTION"
147 .IX Header "DESCRIPTION"
148 Modify molname line and data fields in \fISDFile(s)\fR. Molname line can be replaced by a
149 data field value or assigned a sequential \s-1ID\s0 prefixed with a specific string. For data
150 fields and modification of their values, these types of options are supported: replace
151 data field labels by another set of labels; combine values of multiple data fields and
152 assign a new label; add specific set of data field labels and values to all compound
153 records; and others.
154 .PP
155 The file names are separated by space.The valid file extensions are \fI.sdf\fR and \fI.sd\fR.
156 All other file names are ignored. All the \s-1SD\s0 files in a current directory can be specified
157 either by \fI*.sdf\fR or the current directory name.
158 .SH "OPTIONS"
159 .IX Header "OPTIONS"
160 .IP "\fB\-d, \-\-detail\fR \fIinfolevel\fR" 4
161 .IX Item "-d, --detail infolevel"
162 Level of information to print about compound records being ignored. Default: \fI1\fR. Possible
163 values: \fI1, 2 or 3\fR.
164 .IP "\fB\-\-datafieldscommon\fR \fInewfieldlabel, newfieldvalue, [newfieldlabel, newfieldvalue,...]\fR" 4
165 .IX Item "--datafieldscommon newfieldlabel, newfieldvalue, [newfieldlabel, newfieldvalue,...]"
166 Specify data field labels and values for addition to each compound record. It's a comma delimited
167 list of data field label and values pair. Default: \fInone\fR.
168 .Sp
169 Examples:
170 .Sp
171 .Vb 2
172 \& DepositionDate,YYYY\-MM\-DD
173 \& Source,www.domainname.org,ReleaseData,YYYY\-MM\-DD
174 .Ve
175 .IP "\fB\-\-datafieldsmap\fR \fInewfieldlabel, oldfieldlabel, [oldfieldlabel,...]; [newfieldlabel, oldfieldlabel, [oldfieldlabel,...]]\fR" 4
176 .IX Item "--datafieldsmap newfieldlabel, oldfieldlabel, [oldfieldlabel,...]; [newfieldlabel, oldfieldlabel, [oldfieldlabel,...]]"
177 Specify how various data field labels and values are combined to generate a new data field
178 labels and their values. All the comma delimited data fields, with in a semicolon delimited set,
179 are mapped to the first new data field label along with the data field values joined via new
180 line character. Default: \fInone\fR.
181 .Sp
182 Examples:
183 .Sp
184 .Vb 2
185 \& Synonym,Name,SystematicName,Synonym;CmpdID,Extreg
186 \& HBondDonors,SumNHOH
187 .Ve
188 .IP "\fB\-\-datafieldsmapfile\fR \fIfilename\fR" 4
189 .IX Item "--datafieldsmapfile filename"
190 Filename containing mapping of data fields. Format of data fields line in this file corresponds
191 to \fB\-\-datafieldsmap\fR option. Example:
192 .Sp
193 .Vb 2
194 \& Line 1: Synonym,Name,SystematicName,Synonym;CmpdID,Extreg
195 \& Line 2: HBondDonors,SumNHOH
196 .Ve
197 .IP "\fB\-\-datafieldURL\fR \fIURLDataFieldLabel, CGIScriptPath, CGIParamName, CmpdIDFieldLabel\fR" 4
198 .IX Item "--datafieldURL URLDataFieldLabel, CGIScriptPath, CGIParamName, CmpdIDFieldLabel"
199 Specify how to generate a \s-1URL\s0 for retrieving compound data from a web server and add it
200 to each compound record. \fIURLDataFieldLabel\fR is used as the data field label for \s-1URL\s0 value
201 which is created by combining \fICGIScriptPath,CGIParamName,CmpdIDFieldLabel\fR values:
202 CGIScriptPath?CGIParamName=CmpdIDFieldLabelValue. Default: \fInone\fR.
203 .Sp
204 Example:
205 .Sp
206 .Vb 1
207 \& Source,http://www.yourdomain.org/GetCmpd.pl,Reg_ID,Mol_ID
208 .Ve
209 .IP "\fB\-h, \-\-help\fR" 4
210 .IX Item "-h, --help"
211 Print this help message.
212 .IP "\fB\-k, \-\-keepolddatafields\fR \fIall | unmappedonly | none\fR" 4
213 .IX Item "-k, --keepolddatafields all | unmappedonly | none"
214 Specify how to transfer old data fields from input SDFile(s) to new SDFile(s) during
215 \&\fIdatafields | both\fR value of \fB\-m, \-\-mode\fR option: keep all old data fields; write out the ones
216 not mapped to new fields as specified by \fB\-\-datafieldsmap\fR or <\-\-datafieldsmapfile> options;
217 or ignore all old data field labels. For \fImolname\fR \fB\-m \-\-mode\fR, old datafields are always kept.
218 Possible values: \fIall | unmappedonly | none\fR. Default: \fInone\fR.
219 .IP "\fB\-m, \-\-mode\fR \fImolname | datafields | both\fR" 4
220 .IX Item "-m, --mode molname | datafields | both"
221 Specify how to modify SDFile(s): \fImolname\fR \- change molname line by another datafield or value;
222 \&\fIdatafield\fR \- modify data field labels and values by replacing one label by another, combining
223 multiple data field labels and values, adding specific set of data field labels and values to all compound, or
224 inserting an \s-1URL\s0 for compound retrieval to each record; \fIboth\fR \- change molname line and datafields
225 simultaneously. Possible values: \fImolname | datafields | both\fR. Default: \fImolname\fR
226 .IP "\fB\-\-molnamemode\fR \fIdatafield | labelprefix\fR" 4
227 .IX Item "--molnamemode datafield | labelprefix"
228 Specify how to change molname line for \fB\-m \-\-mode\fR option values of \fImolname | both\fR: use
229 a datafield label value or assign a sequential \s-1ID\s0 prefixed with \fIlabelprefix\fR. Possible values:
230 \&\fIdatafield | labelprefix\fR. Default: \fIlabelprefix\fR.
231 .IP "\fB\-\-molname\fR \fIdatafieldname or prefixstring\fR" 4
232 .IX Item "--molname datafieldname or prefixstring"
233 Molname generation method. For \fIdatafield\fR value of \fB\-\-molnamemode\fR option, it corresponds
234 to datafield label name whose value is used for molname; otherwise, it's a prefix string used for
235 generating compound IDs like labelprefixstring<Number>. Default value, \fICmpd\fR, generates
236 compound IDs like Cmpd<Number> for molname.
237 .IP "\fB\-\-molnamereplace\fR \fIalways | empty\fR" 4
238 .IX Item "--molnamereplace always | empty"
239 Specify when to replace molname line for \fB\-m \-\-mode\fR option values of \fImolname | both\fR:
240 always replace the molname line using \fB\-\-molname\fR option or only when it's empty. Possible
241 values: \fIalways | empty\fR. Default: \fIempty\fR.
242 .IP "\fB\-o, \-\-overwrite\fR" 4
243 .IX Item "-o, --overwrite"
244 Overwrite existing files.
245 .IP "\fB\-r, \-\-root\fR \fIrootname\fR" 4
246 .IX Item "-r, --root rootname"
247 New \s-1SD\s0 file name is generated using the root: <Root>.<Ext>. Default new file
248 name: <InitialSDFileName>ModifiedDataFields.<Ext>. This option is ignored for multiple
249 input files.
250 .IP "\fB\-w, \-\-workingdir\fR \fIdirname\fR" 4
251 .IX Item "-w, --workingdir dirname"
252 Location of working directory. Default: current directory.
253 .SH "EXAMPLES"
254 .IX Header "EXAMPLES"
255 To replace empty molname lines by Cmpd<CmpdNumber> and generate a new \s-1SD\s0 file
256 NewSample1.sdf, type:
257 .PP
258 .Vb 1
259 \& % ModifySDFilesDataFields.pl \-o \-r NewSample1 Sample1.sdf
260 .Ve
261 .PP
262 To replace all molname lines by Mol_ID data field generate a new \s-1SD\s0 file
263 NewSample1.sdf, type:
264 .PP
265 .Vb 2
266 \& % ModifySDFilesDataFields.pl \-\-molnamemode datafield
267 \& \-\-molnamereplace always \-r NewSample1 \-o Sample1.sdf
268 .Ve
269 .PP
270 To replace all molname lines by Mol_ID data field, map Name and CompoundName to
271 a new datafield Synonym, and generate a new \s-1SD\s0 file NewSample1.sdf, type:
272 .PP
273 .Vb 4
274 \& % ModifySDFilesDataFields.pl \-\-molnamemode datafield
275 \& \-\-molnamereplace always \-\-molname Mol_ID \-\-mode both
276 \& \-\-datafieldsmap "Synonym,Name,CompoundName" \-r
277 \& NewSample1 \-o Sample1.sdf
278 .Ve
279 .PP
280 To replace all molname lines by Mol_ID data field, map Name and CompoundName to
281 a new datafield Synonym, add common fields ReleaseDate and Source, and
282 generate a new \s-1SD\s0 file NewSample1.sdf without keeping any old \s-1SD\s0 data fields, type:
283 .PP
284 .Vb 6
285 \& % ModifySDFilesDataFields.pl \-\-molnamemode datafield
286 \& \-\-molnamereplace always \-\-molname Mol_ID \-\-mode both
287 \& \-\-datafieldsmap "Synonym,Name,CompoundName"
288 \& \-\-datafieldscommon "ReleaseDate,yyyy\-mm\-dd,Source,
289 \& www.mayachemtools.org" \-\-keepolddatafields none \-r
290 \& NewSample1 \-o Sample1.sdf
291 .Ve
292 .PP
293 \&\fBPreparing \s-1SD\s0 files PubChem deposition:\fR
294 .PP
295 Consider a \s-1SD\s0 file with these fields: Mol_ID, Name, Synonyms and Systematic_Name.
296 And Mol_ID data field uniquely identifies your compound.
297 .PP
298 To prepare a new \s-1SD\s0 file CmpdDataForPubChem.sdf containing only required
299 \&\s-1PUBCHEM_EXT_DATASOURCE_REGID\s0 field, type:
300 .PP
301 .Vb 4
302 \& % ModifySDFilesDataFields.pl \-\-m datafields
303 \& \-\-datafieldsmap
304 \& "PUBCHEM_EXT_DATASOURCE_REGID,Mol_ID"
305 \& \-r CmpdDataForPubChem \-o Sample1.sdf
306 .Ve
307 .PP
308 To prepare a new \s-1SD\s0 file CmpdDataForPubChem.sdf containing only required
309 \&\s-1PUBCHEM_EXT_DATASOURCE_REGID\s0 field and replace molname line with Mol_ID, type:
310 .PP
311 .Vb 5
312 \& % ModifySDFilesDataFields.pl \-\-molnamemode datafield
313 \& \-\-molnamereplace always \-\-molname Mol_ID \-\-mode both
314 \& \-\-datafieldsmap
315 \& "PUBCHEM_EXT_DATASOURCE_REGID,Mol_ID"
316 \& \-r CmpdDataForPubChem \-o Sample1.sdf
317 .Ve
318 .PP
319 In addition to required PubChem data field, you can also add optional PubChem data
320 fields.
321 .PP
322 To map your Name, Synonyms and Systematic_Name data fields to optional
323 \&\s-1PUBCHEM_SUBSTANCE_SYNONYM\s0 data field along with required \s-1ID\s0 field, type:
324 .PP
325 .Vb 6
326 \& % ModifySDFilesDataFields.pl \-\-molnamemode datafield
327 \& \-\-molnamereplace always \-\-molname Mol_ID \-\-mode both
328 \& \-\-datafieldsmap
329 \& "PUBCHEM_EXT_DATASOURCE_REGID,Mol_ID;
330 \& PUBCHEM_SUBSTANCE_SYNONYM,Name,CompoundName"
331 \& \-r CmpdDataForPubChem \-o Sample1.sdf
332 .Ve
333 .PP
334 To add your <domain.org> as \s-1PUBCHEM_EXT_SUBSTANCE_URL\s0 and link substance
335 retrieval to your \s-1CGI\s0 script <http://www.yourdomain.org/GetCmpd.pl,Reg_ID,Mol_ID>
336 via \s-1PUBCHEM_EXT_DATASOURCE_REGID\s0 field along with optional and required
337 data fields, type:
338 .PP
339 .Vb 10
340 \& % ModifySDFilesDataFields.pl \-\-molnamemode datafield
341 \& \-\-molnamereplace always \-\-molname Mol_ID \-\-mode both
342 \& \-\-datafieldsmap
343 \& "PUBCHEM_EXT_DATASOURCE_REGID,Mol_ID;
344 \& PUBCHEM_SUBSTANCE_SYNONYM,Name,CompoundName"
345 \& \-\-datafieldscommon
346 \& "PUBCHEM_EXT_SUBSTANCE_URL,domain.org"
347 \& \-\-datafieldURL "PUBCHEM_EXT_DATASOURCE_URL,
348 \& http://www.yourdomain.org/GetCmpd.pl,Reg_ID,Mol_ID"
349 \& \-r CmpdDataForPubChem \-o Sample1.sdf
350 .Ve
351 .PP
352 And to add a publication date and request a release data using
353 \&\s-1PUBCHEM_PUBLICATION_DATE\s0 and \s-1PUBCHEM_DEPOSITOR_RECORD_DATE\s0 data fields
354 along with all the data fields in earlier examples, type:
355 optional fields, type:
356 .PP
357 .Vb 12
358 \& % ModifySDFilesDataFields.pl \-\-molnamemode datafield
359 \& \-\-molnamereplace always \-\-molname Mol_ID \-\-mode both
360 \& \-\-datafieldsmap
361 \& "PUBCHEM_EXT_DATASOURCE_REGID,Mol_ID;
362 \& PUBCHEM_SUBSTANCE_SYNONYM,Name,CompoundName"
363 \& \-\-datafieldURL "PUBCHEM_EXT_DATASOURCE_URL,
364 \& http://www.yourdomain.org/GetCmpd.pl,Reg_ID,Mol_ID"
365 \& \-\-datafieldscommon
366 \& "PUBCHEM_EXT_SUBSTANCE_URL,domain.org,
367 \& PUBCHEM_PUBLICATION_DATE,YYY\-MM\-DD,
368 \& PUBCHEM_DEPOSITOR_RECORD_DATE,YYYY\-MM\-DD"
369 \& \-r CmpdDataForPubChem \-o Sample1.sdf
370 .Ve
371 .SH "AUTHOR"
372 .IX Header "AUTHOR"
373 Manish Sud <msud@san.rr.com>
374 .SH "SEE ALSO"
375 .IX Header "SEE ALSO"
376 InfoSDFiles.pl, JoinSDFiles.pl, MergeTextFilesWithSD.pl, SplitSDFiles.pl, SDFilesToHTML.pl
377 .SH "COPYRIGHT"
378 .IX Header "COPYRIGHT"
379 Copyright (C) 2015 Manish Sud. All rights reserved.
380 .PP
381 This file is part of MayaChemTools.
382 .PP
383 MayaChemTools is free software; you can redistribute it and/or modify it under
384 the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free
385 Software Foundation; either version 3 of the License, or (at your option)
386 any later version.