comparison docs/scripts/man1/MACCSKeysFingerprints.1 @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 .\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22)
2 .\"
3 .\" Standard preamble:
4 .\" ========================================================================
5 .de Sp \" Vertical space (when we can't use .PP)
6 .if t .sp .5v
7 .if n .sp
8 ..
9 .de Vb \" Begin verbatim text
10 .ft CW
11 .nf
12 .ne \\$1
13 ..
14 .de Ve \" End verbatim text
15 .ft R
16 .fi
17 ..
18 .\" Set up some character translations and predefined strings. \*(-- will
19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
20 .\" double quote, and \*(R" will give a right double quote. \*(C+ will
21 .\" give a nicer C++. Capital omega is used to do unbreakable dashes and
22 .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
23 .\" nothing in troff, for use with C<>.
24 .tr \(*W-
25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
26 .ie n \{\
27 . ds -- \(*W-
28 . ds PI pi
29 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
30 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
31 . ds L" ""
32 . ds R" ""
33 . ds C` ""
34 . ds C' ""
35 'br\}
36 .el\{\
37 . ds -- \|\(em\|
38 . ds PI \(*p
39 . ds L" ``
40 . ds R" ''
41 'br\}
42 .\"
43 .\" Escape single quotes in literal strings from groff's Unicode transform.
44 .ie \n(.g .ds Aq \(aq
45 .el .ds Aq '
46 .\"
47 .\" If the F register is turned on, we'll generate index entries on stderr for
48 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
49 .\" entries marked with X<> in POD. Of course, you'll have to process the
50 .\" output yourself in some meaningful fashion.
51 .ie \nF \{\
52 . de IX
53 . tm Index:\\$1\t\\n%\t"\\$2"
54 ..
55 . nr % 0
56 . rr F
57 .\}
58 .el \{\
59 . de IX
60 ..
61 .\}
62 .\"
63 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
64 .\" Fear. Run. Save yourself. No user-serviceable parts.
65 . \" fudge factors for nroff and troff
66 .if n \{\
67 . ds #H 0
68 . ds #V .8m
69 . ds #F .3m
70 . ds #[ \f1
71 . ds #] \fP
72 .\}
73 .if t \{\
74 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
75 . ds #V .6m
76 . ds #F 0
77 . ds #[ \&
78 . ds #] \&
79 .\}
80 . \" simple accents for nroff and troff
81 .if n \{\
82 . ds ' \&
83 . ds ` \&
84 . ds ^ \&
85 . ds , \&
86 . ds ~ ~
87 . ds /
88 .\}
89 .if t \{\
90 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
91 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
92 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
93 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
94 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
95 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
96 .\}
97 . \" troff and (daisy-wheel) nroff accents
98 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
99 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
100 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
101 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
102 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
103 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
104 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
105 .ds ae a\h'-(\w'a'u*4/10)'e
106 .ds Ae A\h'-(\w'A'u*4/10)'E
107 . \" corrections for vroff
108 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
109 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
110 . \" for low resolution devices (crt and lpr)
111 .if \n(.H>23 .if \n(.V>19 \
112 \{\
113 . ds : e
114 . ds 8 ss
115 . ds o a
116 . ds d- d\h'-1'\(ga
117 . ds D- D\h'-1'\(hy
118 . ds th \o'bp'
119 . ds Th \o'LP'
120 . ds ae ae
121 . ds Ae AE
122 .\}
123 .rm #[ #] #H #V #F C
124 .\" ========================================================================
125 .\"
126 .IX Title "MACCSKEYSFINGERPRINTS 1"
127 .TH MACCSKEYSFINGERPRINTS 1 "2015-03-29" "perl v5.14.2" "MayaChemTools"
128 .\" For nroff, turn off justification. Always turn off hyphenation; it makes
129 .\" way too many mistakes in technical documents.
130 .if n .ad l
131 .nh
132 .SH "NAME"
133 MACCSKeysFingerprints.pl \- Generate MACCS key fingerprints for SD files
134 .SH "SYNOPSIS"
135 .IX Header "SYNOPSIS"
136 MACCSKeysFingerprints.pl SDFile(s)...
137 .PP
138 MACCSKeysFingerprints.pl [\fB\-\-AromaticityModel\fR \fIAromaticityModelType\fR]
139 [\fB\-\-BitsOrder\fR \fIAscending | Descending\fR]
140 [\fB\-b, \-\-BitStringFormat\fR \fIBinaryString | HexadecimalString\fR]
141 [\fB\-\-CompoundID\fR \fIDataFieldName or LabelPrefixString\fR] [\fB\-\-CompoundIDLabel\fR \fItext\fR]
142 [\fB\-\-CompoundIDMode\fR \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR]
143 [\fB\-\-DataFields\fR \fI\*(L"FieldLabel1,FieldLabel2,...\*(R"\fR] [\fB\-d, \-\-DataFieldsMode\fR \fIAll | Common | Specify | CompoundID\fR]
144 [\fB\-f, \-\-Filter\fR \fIYes | No\fR] [\fB\-\-FingerprintsLabel\fR \fItext\fR] [\fB\-h, \-\-help\fR] [\fB\-k, \-\-KeepLargestComponent\fR \fIYes | No\fR]
145 [\fB\-m, \-\-mode\fR \fIMACCSKeyBits | MACCSKeyCount\fR] [\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR]
146 [\fB\-\-output\fR \fI\s-1SD\s0 | \s-1FP\s0 | text | all\fR] [\fB\-o, \-\-overwrite\fR]
147 [\fB\-q, \-\-quote\fR \fIYes | No\fR] [\fB\-r, \-\-root\fR \fIRootName\fR] [\fB\-s, \-\-size\fR \fInumber\fR]
148 [\fB\-v, \-\-VectorStringFormat\fR \fIIDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString\fR]
149 [\fB\-w, \-\-WorkingDir\fR \fIDirName\fR]
150 .SH "DESCRIPTION"
151 .IX Header "DESCRIPTION"
152 Generate \s-1MACCS\s0 (Molecular ACCess System) keys fingerprints [ Ref 45\-47 ] for \fISDFile(s)\fR
153 and create appropriate \s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) containing fingerprints bit-vector or
154 vector strings corresponding to molecular fingerprints.
155 .PP
156 Multiple SDFile names are separated by spaces. The valid file extensions are \fI.sdf\fR
157 and \fI.sd\fR. All other file names are ignored. All the \s-1SD\s0 files in a current directory
158 can be specified either by \fI*.sdf\fR or the current directory name.
159 .PP
160 For each \s-1MACCS\s0 keys definition, atoms are processed to determine their membership to the key
161 and the appropriate molecular fingerprints strings are generated. An atom can belong to multiple
162 \&\s-1MACCS\s0 keys.
163 .PP
164 For \fIMACCSKeyBits\fR value of \fB\-m, \-\-mode\fR option, a fingerprint bit-vector string containing
165 zeros and ones is generated and for \fIMACCSKeyCount\fR value, a fingerprint vector string
166 corresponding to number of \s-1MACCS\s0 keys [ Ref 45\-47 ] is generated.
167 .PP
168 \&\fIMACCSKeyBits | MACCSKeyCount\fR values for \fB\-m, \-\-mode\fR option along with two possible
169 \&\fI166 | 322\fR values of \fB\-s, \-\-size\fR supports generation of four different types of \s-1MACCS\s0
170 keys fingerprint: \fIMACCS166KeyBits, MACCS166KeyCount, MACCS322KeyBits, MACCS322KeyCount\fR.
171 .PP
172 Example of \fI\s-1SD\s0\fR file containing \s-1MAACS\s0 keys fingerprints string data:
173 .PP
174 .Vb 10
175 \& ... ...
176 \& ... ...
177 \& $$$$
178 \& ... ...
179 \& ... ...
180 \& ... ...
181 \& 41 44 0 0 0 0 0 0 0 0999 V2000
182 \& \-3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
183 \& ... ...
184 \& 2 3 1 0 0 0 0
185 \& ... ...
186 \& M END
187 \& > <CmpdID>
188 \& Cmpd1
189 \&
190 \& > <MACCSKeysFingerprints>
191 \& FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;000000000
192 \& 00000000000000000000000000000000100100001001000000001001000000001110001
193 \& 00101010111100011011000100110110000011011110100110111111111111011111111
194 \& 11111111110111000
195 \&
196 \& $$$$
197 \& ... ...
198 \& ... ...
199 .Ve
200 .PP
201 Example of \fI\s-1FP\s0\fR file containing \s-1MAACS\s0 keys fingerprints string data:
202 .PP
203 .Vb 10
204 \& #
205 \& # Package = MayaChemTools 7.4
206 \& # Release Date = Oct 21, 2010
207 \& #
208 \& # TimeStamp = Fri Mar 11 14:57:24 2011
209 \& #
210 \& # FingerprintsStringType = FingerprintsBitVector
211 \& #
212 \& # Description = MACCSKeyBits
213 \& # Size = 166
214 \& # BitStringFormat = BinaryString
215 \& # BitsOrder = Ascending
216 \& #
217 \& Cmpd1 00000000000000000000000000000000000000000100100001001000000001...
218 \& Cmpd2 00000000000000000000000010000000001000000010000000001000000000...
219 \& ... ...
220 \& ... ..
221 .Ve
222 .PP
223 Example of \s-1CSV\s0 \fIText\fR file containing \s-1MAACS\s0 keys fingerprints string data:
224 .PP
225 .Vb 7
226 \& "CompoundID","MACCSKeysFingerprints"
227 \& "Cmpd1","FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;
228 \& 00000000000000000000000000000000000000000100100001001000000001001000000
229 \& 00111000100101010111100011011000100110110000011011110100110111111111111
230 \& 01111111111111111110111000"
231 \& ... ...
232 \& ... ...
233 .Ve
234 .PP
235 The current release of MayaChemTools generates the following types of \s-1MACCS\s0 keys
236 fingerprints bit-vector and vector strings:
237 .PP
238 .Vb 4
239 \& FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
240 \& 0000000000000000000000000000000001001000010010000000010010000000011100
241 \& 0100101010111100011011000100110110000011011110100110111111111111011111
242 \& 11111111111110111000
243 \&
244 \& FingerprintsBitVector;MACCSKeyBits;166;HexadecimalString;Ascending;000
245 \& 000000021210210e845f8d8c60b79dffbffffd1
246 \&
247 \& FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
248 \& 1110011111100101111111000111101100110000000000000011100010000000000000
249 \& 0000000000000000000000000000000000000000000000101000000000000000000000
250 \& 0000000000000000000000000000000000000000000000000000000000000000000000
251 \& 0000000000000000000000000000000000000011000000000000000000000000000000
252 \& 0000000000000000000000000000000000000000
253 \&
254 \& FingerprintsBitVector;MACCSKeyBits;322;HexadecimalString;Ascending;7d7
255 \& e7af3edc000c1100000000000000500000000000000000000000000000000300000000
256 \& 000000000
257 \&
258 \& FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
259 \& ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
260 \& 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
261 \& 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
262 \& 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
263 \& 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
264 \&
265 \& FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
266 \& ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
267 \& 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
268 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
269 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
270 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
271 .Ve
272 .SH "OPTIONS"
273 .IX Header "OPTIONS"
274 .IP "\fB\-\-AromaticityModel\fR \fIMDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel\fR" 4
275 .IX Item "--AromaticityModel MDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel"
276 Specify aromaticity model to use during detection of aromaticity. Possible values in the current
277 release are: \fIMDLAromaticityModel, TriposAromaticityModel, MMFFAromaticityModel,
278 ChemAxonBasicAromaticityModel, ChemAxonGeneralAromaticityModel, DaylightAromaticityModel
279 or MayaChemToolsAromaticityModel\fR. Default value: \fIMayaChemToolsAromaticityModel\fR.
280 .Sp
281 The supported aromaticity model names along with model specific control parameters
282 are defined in \fBAromaticityModelsData.csv\fR, which is distributed with the current release
283 and is available under \fBlib/data\fR directory. \fBMolecule.pm\fR module retrieves data from
284 this file during class instantiation and makes it available to method \fBDetectAromaticity\fR
285 for detecting aromaticity corresponding to a specific model.
286 .IP "\fB\-\-BitsOrder\fR \fIAscending | Descending\fR" 4
287 .IX Item "--BitsOrder Ascending | Descending"
288 Bits order to use during generation of fingerprints bit-vector string for \fIMACCSKeyBits\fR value of
289 \&\fB\-m, \-\-mode\fR option. Possible values: \fIAscending, Descending\fR. Default: \fIAscending\fR.
290 .Sp
291 \&\fIAscending\fR bit order which corresponds to first bit in each byte as the lowest bit as
292 opposed to the highest bit.
293 .Sp
294 Internally, bits are stored in \fIAscending\fR order using Perl vec function. Regardless
295 of machine order, big-endian or little-endian, vec function always considers first
296 string byte as the lowest byte and first bit within each byte as the lowest bit.
297 .IP "\fB\-b, \-\-BitStringFormat\fR \fIBinaryString | HexadecimalString\fR" 4
298 .IX Item "-b, --BitStringFormat BinaryString | HexadecimalString"
299 Format of fingerprints bit-vector string data in output \s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) specified by
300 \&\fB\-\-output\fR used during \fIMACCSKeyBits\fR value of \fB\-m, \-\-mode\fR option. Possible
301 values: \fIBinaryString, HexadecimalString\fR. Default value: \fIBinaryString\fR.
302 .Sp
303 \&\fIBinaryString\fR corresponds to an \s-1ASCII\s0 string containing 1s and 0s. \fIHexadecimalString\fR
304 contains bit values in \s-1ASCII\s0 hexadecimal format.
305 .Sp
306 Examples:
307 .Sp
308 .Vb 4
309 \& FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
310 \& 0000000000000000000000000000000001001000010010000000010010000000011100
311 \& 0100101010111100011011000100110110000011011110100110111111111111011111
312 \& 11111111111110111000
313 \&
314 \& FingerprintsBitVector;MACCSKeyBits;166;HexadecimalString;Ascending;000
315 \& 000000021210210e845f8d8c60b79dffbffffd1
316 \&
317 \& FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
318 \& 1110011111100101111111000111101100110000000000000011100010000000000000
319 \& 0000000000000000000000000000000000000000000000101000000000000000000000
320 \& 0000000000000000000000000000000000000000000000000000000000000000000000
321 \& 0000000000000000000000000000000000000011000000000000000000000000000000
322 \& 0000000000000000000000000000000000000000
323 \&
324 \& FingerprintsBitVector;MACCSKeyBits;322;HexadecimalString;Ascending;7d7
325 \& e7af3edc000c1100000000000000500000000000000000000000000000000300000000
326 \& 000000000
327 .Ve
328 .IP "\fB\-\-CompoundID\fR \fIDataFieldName or LabelPrefixString\fR" 4
329 .IX Item "--CompoundID DataFieldName or LabelPrefixString"
330 This value is \fB\-\-CompoundIDMode\fR specific and indicates how compound \s-1ID\s0 is generated.
331 .Sp
332 For \fIDataField\fR value of \fB\-\-CompoundIDMode\fR option, it corresponds to datafield label name
333 whose value is used as compound \s-1ID\s0; otherwise, it's a prefix string used for generating compound
334 IDs like LabelPrefixString<Number>. Default value, \fICmpd\fR, generates compound IDs which
335 look like Cmpd<Number>.
336 .Sp
337 Examples for \fIDataField\fR value of \fB\-\-CompoundIDMode\fR:
338 .Sp
339 .Vb 2
340 \& MolID
341 \& ExtReg
342 .Ve
343 .Sp
344 Examples for \fILabelPrefix\fR or \fIMolNameOrLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR:
345 .Sp
346 .Vb 1
347 \& Compound
348 .Ve
349 .Sp
350 The value specified above generates compound IDs which correspond to Compound<Number>
351 instead of default value of Cmpd<Number>.
352 .IP "\fB\-\-CompoundIDLabel\fR \fItext\fR" 4
353 .IX Item "--CompoundIDLabel text"
354 Specify compound \s-1ID\s0 column label for \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) used during \fICompoundID\fR value
355 of \fB\-\-DataFieldsMode\fR option. Default: \fICompoundID\fR.
356 .IP "\fB\-\-CompoundIDMode\fR \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR" 4
357 .IX Item "--CompoundIDMode DataField | MolName | LabelPrefix | MolNameOrLabelPrefix"
358 Specify how to generate compound IDs and write to \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) along with generated
359 fingerprints for \fI\s-1FP\s0 | text | all\fR values of \fB\-\-output\fR option: use a \fISDFile(s)\fR datafield value;
360 use molname line from \fISDFile(s)\fR; generate a sequential \s-1ID\s0 with specific prefix; use combination
361 of both MolName and LabelPrefix with usage of LabelPrefix values for empty molname lines.
362 .Sp
363 Possible values: \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR.
364 Default: \fILabelPrefix\fR.
365 .Sp
366 For \fIMolNameAndLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR, molname line in \fISDFile(s)\fR takes
367 precedence over sequential compound IDs generated using \fILabelPrefix\fR and only empty molname
368 values are replaced with sequential compound IDs.
369 .Sp
370 This is only used for \fICompoundID\fR value of \fB\-\-DataFieldsMode\fR option.
371 .ie n .IP "\fB\-\-DataFields\fR \fI""FieldLabel1,FieldLabel2,...""\fR" 4
372 .el .IP "\fB\-\-DataFields\fR \fI``FieldLabel1,FieldLabel2,...''\fR" 4
373 .IX Item "--DataFields FieldLabel1,FieldLabel2,..."
374 Comma delimited list of \fISDFiles(s)\fR data fields to extract and write to \s-1CSV/TSV\s0 text file(s) along
375 with generated fingerprints for \fItext | all\fR values of \fB\-\-output\fR option.
376 .Sp
377 This is only used for \fISpecify\fR value of \fB\-\-DataFieldsMode\fR option.
378 .Sp
379 Examples:
380 .Sp
381 .Vb 2
382 \& Extreg
383 \& MolID,CompoundName
384 .Ve
385 .IP "\fB\-d, \-\-DataFieldsMode\fR \fIAll | Common | Specify | CompoundID\fR" 4
386 .IX Item "-d, --DataFieldsMode All | Common | Specify | CompoundID"
387 Specify how data fields in \fISDFile(s)\fR are transferred to output \s-1CSV/TSV\s0 text file(s) along
388 with generated fingerprints for \fItext | all\fR values of \fB\-\-output\fR option: transfer all \s-1SD\s0
389 data field; transfer \s-1SD\s0 data files common to all compounds; extract specified data fields;
390 generate a compound \s-1ID\s0 using molname line, a compound prefix, or a combination of both.
391 Possible values: \fIAll | Common | specify | CompoundID\fR. Default value: \fICompoundID\fR.
392 .IP "\fB\-f, \-\-Filter\fR \fIYes | No\fR" 4
393 .IX Item "-f, --Filter Yes | No"
394 Specify whether to check and filter compound data in SDFile(s). Possible values: \fIYes or No\fR.
395 Default value: \fIYes\fR.
396 .Sp
397 By default, compound data is checked before calculating fingerprints and compounds containing
398 atom data corresponding to non-element symbols or no atom data are ignored.
399 .IP "\fB\-\-FingerprintsLabel\fR \fItext\fR" 4
400 .IX Item "--FingerprintsLabel text"
401 \&\s-1SD\s0 data label or text file column label to use for fingerprints string in output \s-1SD\s0 or
402 \&\s-1CSV/TSV\s0 text file(s) specified by \fB\-\-output\fR. Default value: \fIMACCSKeyFingerprints\fR.
403 .IP "\fB\-h, \-\-help\fR" 4
404 .IX Item "-h, --help"
405 Print this help message.
406 .IP "\fB\-k, \-\-KeepLargestComponent\fR \fIYes | No\fR" 4
407 .IX Item "-k, --KeepLargestComponent Yes | No"
408 Generate fingerprints for only the largest component in molecule. Possible values:
409 \&\fIYes or No\fR. Default value: \fIYes\fR.
410 .Sp
411 For molecules containing multiple connected components, fingerprints can be generated
412 in two different ways: use all connected components or just the largest connected
413 component. By default, all atoms except for the largest connected component are
414 deleted before generation of fingerprints.
415 .IP "\fB\-m, \-\-mode\fR \fIMACCSKeyBits | MACCSKeyCount\fR" 4
416 .IX Item "-m, --mode MACCSKeyBits | MACCSKeyCount"
417 Specify type of \s-1MACCS\s0 keys [ Ref 45\-47 ] fingerprints to generate for molecules in \fISDFile(s)\fR.
418 Possible values: \fIMACCSKeyBits, MACCSKeyCount\fR. Default value: \fIMACCSKeyBits\fR.
419 .Sp
420 For \fIMACCSKeyBits\fR value of \fB\-m, \-\-mode\fR option, a fingerprint bit-vector string containing
421 zeros and ones is generated and for \fIMACCSKeyCount\fR value, a fingerprint vector string
422 corresponding to number of \s-1MACCS\s0 keys is generated.
423 .Sp
424 \&\fIMACCSKeyBits | MACCSKeyCount\fR values for \fB\-m, \-\-mode\fR option along with two possible
425 \&\fI166 | 322\fR values of \fB\-s, \-\-size\fR supports generation of four different types of \s-1MACCS\s0
426 keys fingerprint: \fIMACCS166KeyBits, MACCS166KeyCount, MACCS322KeyBits, MACCS322KeyCount\fR.
427 .Sp
428 Definition of \s-1MACCS\s0 keys uses the following atom and bond symbols to define atom and
429 bond environments:
430 .Sp
431 .Vb 1
432 \& Atom symbols for 166 keys [ Ref 47 ]:
433 \&
434 \& A : Any valid periodic table element symbol
435 \& Q : Hetro atoms; any non\-C or non\-H atom
436 \& X : Halogens; F, Cl, Br, I
437 \& Z : Others; other than H, C, N, O, Si, P, S, F, Cl, Br, I
438 \&
439 \& Atom symbols for 322 keys [ Ref 46 ]:
440 \&
441 \& A : Any valid periodic table element symbol
442 \& Q : Hetro atoms; any non\-C or non\-H atom
443 \& X : Others; other than H, C, N, O, Si, P, S, F, Cl, Br, I
444 \& Z is neither defined nor used
445 \&
446 \& Bond types:
447 \&
448 \& \- : Single
449 \& = : Double
450 \& T : Triple
451 \& # : Triple
452 \& ~ : Single or double query bond
453 \& % : An aromatic query bond
454 \&
455 \& None : Any bond type; no explicit bond specified
456 \&
457 \& $ : Ring bond; $ before a bond type specifies ring bond
458 \& ! : Chain or non\-ring bond; ! before a bond type specifies chain bond
459 \&
460 \& @ : A ring linkage and the number following it specifies the
461 \& atoms position in the line, thus @1 means linked back to the first
462 \& atom in the list.
463 \&
464 \& Aromatic: Kekule or Arom5
465 \&
466 \& Kekule: Bonds in 6\-membered rings with alternate single/double bonds
467 \& or perimeter bonds
468 \& Arom5: Bonds in 5\-membered rings with two double bonds and a hetro
469 \& atom at the apex of the ring.
470 .Ve
471 .Sp
472 \&\s-1MACCS\s0 166 keys [ Ref 45\-47 ] are defined as follows:
473 .Sp
474 .Vb 1
475 \& Key Description
476 \&
477 \& 1 ISOTOPE
478 \& 2 103 < ATOMIC NO. < 256
479 \& 3 GROUP IVA,VA,VIA PERIODS 4\-6 (Ge...)
480 \& 4 ACTINIDE
481 \& 5 GROUP IIIB,IVB (Sc...)
482 \& 6 LANTHANIDE
483 \& 7 GROUP VB,VIB,VIIB (V...)
484 \& 8 QAAA@1
485 \& 9 GROUP VIII (Fe...)
486 \& 10 GROUP IIA (ALKALINE EARTH)
487 \& 11 4M RING
488 \& 12 GROUP IB,IIB (Cu...)
489 \& 13 ON(C)C
490 \& 14 S\-S
491 \& 15 OC(O)O
492 \& 16 QAA@1
493 \& 17 CTC
494 \& 18 GROUP IIIA (B...)
495 \& 19 7M RING
496 \& 20 SI
497 \& 21 C=C(Q)Q
498 \& 22 3M RING
499 \& 23 NC(O)O
500 \& 24 N\-O
501 \& 25 NC(N)N
502 \& 26 C$=C($A)$A
503 \& 27 I
504 \& 28 QCH2Q
505 \& 29 P
506 \& 30 CQ(C)(C)A
507 \& 31 QX
508 \& 32 CSN
509 \& 33 NS
510 \& 34 CH2=A
511 \& 35 GROUP IA (ALKALI METAL)
512 \& 36 S HETEROCYCLE
513 \& 37 NC(O)N
514 \& 38 NC(C)N
515 \& 39 OS(O)O
516 \& 40 S\-O
517 \& 41 CTN
518 \& 42 F
519 \& 43 QHAQH
520 \& 44 OTHER
521 \& 45 C=CN
522 \& 46 BR
523 \& 47 SAN
524 \& 48 OQ(O)O
525 \& 49 CHARGE
526 \& 50 C=C(C)C
527 \& 51 CSO
528 \& 52 NN
529 \& 53 QHAAAQH
530 \& 54 QHAAQH
531 \& 55 OSO
532 \& 56 ON(O)C
533 \& 57 O HETEROCYCLE
534 \& 58 QSQ
535 \& 59 Snot%A%A
536 \& 60 S=O
537 \& 61 AS(A)A
538 \& 62 A$A!A$A
539 \& 63 N=O
540 \& 64 A$A!S
541 \& 65 C%N
542 \& 66 CC(C)(C)A
543 \& 67 QS
544 \& 68 QHQH (&...)
545 \& 69 QQH
546 \& 70 QNQ
547 \& 71 NO
548 \& 72 OAAO
549 \& 73 S=A
550 \& 74 CH3ACH3
551 \& 75 A!N$A
552 \& 76 C=C(A)A
553 \& 77 NAN
554 \& 78 C=N
555 \& 79 NAAN
556 \& 80 NAAAN
557 \& 81 SA(A)A
558 \& 82 ACH2QH
559 \& 83 QAAAA@1
560 \& 84 NH2
561 \& 85 CN(C)C
562 \& 86 CH2QCH2
563 \& 87 X!A$A
564 \& 88 S
565 \& 89 OAAAO
566 \& 90 QHAACH2A
567 \& 91 QHAAACH2A
568 \& 92 OC(N)C
569 \& 93 QCH3
570 \& 94 QN
571 \& 95 NAAO
572 \& 96 5M RING
573 \& 97 NAAAO
574 \& 98 QAAAAA@1
575 \& 99 C=C
576 \& 100 ACH2N
577 \& 101 8M RING
578 \& 102 QO
579 \& 103 CL
580 \& 104 QHACH2A
581 \& 105 A$A($A)$A
582 \& 106 QA(Q)Q
583 \& 107 XA(A)A
584 \& 108 CH3AAACH2A
585 \& 109 ACH2O
586 \& 110 NCO
587 \& 111 NACH2A
588 \& 112 AA(A)(A)A
589 \& 113 Onot%A%A
590 \& 114 CH3CH2A
591 \& 115 CH3ACH2A
592 \& 116 CH3AACH2A
593 \& 117 NAO
594 \& 118 ACH2CH2A > 1
595 \& 119 N=A
596 \& 120 HETEROCYCLIC ATOM > 1 (&...)
597 \& 121 N HETEROCYCLE
598 \& 122 AN(A)A
599 \& 123 OCO
600 \& 124 QQ
601 \& 125 AROMATIC RING > 1
602 \& 126 A!O!A
603 \& 127 A$A!O > 1 (&...)
604 \& 128 ACH2AAACH2A
605 \& 129 ACH2AACH2A
606 \& 130 QQ > 1 (&...)
607 \& 131 QH > 1
608 \& 132 OACH2A
609 \& 133 A$A!N
610 \& 134 X (HALOGEN)
611 \& 135 Nnot%A%A
612 \& 136 O=A > 1
613 \& 137 HETEROCYCLE
614 \& 138 QCH2A > 1 (&...)
615 \& 139 OH
616 \& 140 O > 3 (&...)
617 \& 141 CH3 > 2 (&...)
618 \& 142 N > 1
619 \& 143 A$A!O
620 \& 144 Anot%A%Anot%A
621 \& 145 6M RING > 1
622 \& 146 O > 2
623 \& 147 ACH2CH2A
624 \& 148 AQ(A)A
625 \& 149 CH3 > 1
626 \& 150 A!A$A!A
627 \& 151 NH
628 \& 152 OC(C)C
629 \& 153 QCH2A
630 \& 154 C=O
631 \& 155 A!CH2!A
632 \& 156 NA(A)A
633 \& 157 C\-O
634 \& 158 C\-N
635 \& 159 O > 1
636 \& 160 CH3
637 \& 161 N
638 \& 162 AROMATIC
639 \& 163 6M RING
640 \& 164 O
641 \& 165 RING
642 \& 166 FRAGMENTS
643 .Ve
644 .Sp
645 \&\s-1MACCS\s0 322 keys set as defined in tables 1, 2 and 3 [ Ref 46 ] include:
646 .Sp
647 .Vb 3
648 \& . 26 atom properties of type P, as listed in Table 1
649 \& . 32 one\-atom environments, as listed in Table 3
650 \& . 264 atom\-bond\-atom combinations listed in Table 4
651 .Ve
652 .Sp
653 Total number of keys in three tables is : 322
654 .Sp
655 Atom symbol, X, used for 322 keys [ Ref 46 ] doesn't refer to Halogens as it does for 166 keys. In
656 order to keep the definition of 322 keys consistent with the published definitions, the symbol X is
657 used to imply \*(L"others\*(R" atoms, but it's internally mapped to symbol X as defined for 166 keys
658 during the generation of key values.
659 .Sp
660 Atom properties-based keys (26):
661 .Sp
662 .Vb 10
663 \& Key Description
664 \& 1 A(AAA) or AA(A)A \- atom with at least three neighbors
665 \& 2 Q \- heteroatom
666 \& 3 Anot%not\-A \- atom involved in one or more multiple bonds, not aromatic
667 \& 4 A(AAAA) or AA(A)(A)A \- atom with at least four neighbors
668 \& 5 A(QQ) or QA(Q) \- atom with at least two heteroatom neighbors
669 \& 6 A(QQQ) or QA(Q)Q \- atom with at least three heteroatom neighbors
670 \& 7 QH \- heteroatom with at least one hydrogen attached
671 \& 8 CH2(AA) or ACH2A \- carbon with at least two single bonds and at least
672 \& two hydrogens attached
673 \& 9 CH3(A) or ACH3 \- carbon with at least one single bond and at least three
674 \& hydrogens attached
675 \& 10 Halogen
676 \& 11 A(\-A\-A\-A) or A\-A(\-A)\-A \- atom has at least three single bonds
677 \& 12 AAAAAA@1 > 2 \- atom is in at least two different six\-membered rings
678 \& 13 A($A$A$A) or A$A($A)$A \- atom has more than two ring bonds
679 \& 14 A$A!A$A \- atom is at a ring/chain boundary. When a comparison is done
680 \& with another atom the path passes through the chain bond.
681 \& 15 Anot%A%Anot%A \- atom is at an aromatic/nonaromatic boundary. When a
682 \& comparison is done with another atom the path
683 \& passes through the aromatic bond.
684 \& 16 A!A!A \- atom with more than one chain bond
685 \& 17 A!A$A!A \- atom is at a ring/chain boundary. When a comparison is done
686 \& with another atom the path passes through the ring bond.
687 \& 18 A%Anot%A%A \- atom is at an aromatic/nonaromatic boundary. When a
688 \& comparison is done with another atom the
689 \& path passes through the nonaromatic bond.
690 \& 19 HETEROCYCLE \- atom is a heteroatom in a ring.
691 \& 20 rare properties: atom with five or more neighbors, atom in
692 \& four or more rings, or atom types other than
693 \& H, C, N, O, S, F, Cl, Br, or I
694 \& 21 rare properties: atom has a charge, is an isotope, has two or
695 \& more multiple bonds, or has a triple bond.
696 \& 22 N \- nitrogen
697 \& 23 S \- sulfur
698 \& 24 O \- oxygen
699 \& 25 A(AA)A(A)A(AA) \- atom has two neighbors, each with three or
700 \& more neighbors (including the central atom).
701 \& 26 CHACH2 \- atom has two hydrocarbon (CH2) neighbors
702 .Ve
703 .Sp
704 Atomic environments properties-based keys (32):
705 .Sp
706 .Vb 10
707 \& Key Description
708 \& 27 C(CC)
709 \& 28 C(CCC)
710 \& 29 C(CN)
711 \& 30 C(CCN)
712 \& 31 C(NN)
713 \& 32 C(NNC)
714 \& 33 C(NNN)
715 \& 34 C(CO)
716 \& 35 C(CCO)
717 \& 36 C(NO)
718 \& 37 C(NCO)
719 \& 38 C(NNO)
720 \& 39 C(OO)
721 \& 40 C(COO)
722 \& 41 C(NOO)
723 \& 42 C(OOO)
724 \& 43 Q(CC)
725 \& 44 Q(CCC)
726 \& 45 Q(CN)
727 \& 46 Q(CCN)
728 \& 47 Q(NN)
729 \& 48 Q(CNN)
730 \& 49 Q(NNN)
731 \& 50 Q(CO)
732 \& 51 Q(CCO)
733 \& 52 Q(NO)
734 \& 53 Q(CNO)
735 \& 54 Q(NNO)
736 \& 55 Q(OO)
737 \& 56 Q(COO)
738 \& 57 Q(NOO)
739 \& 58 Q(OOO)
740 .Ve
741 .Sp
742 Note: The first symbol is the central atom, with atoms bonded to the central atom listed in
743 parentheses. Q is any non-C, non-H atom. If only two atoms are in parentheses, there is
744 no implication concerning the other atoms bonded to the central atom.
745 .Sp
746 Atom-Bond-Atom properties-based keys: (264)
747 .Sp
748 .Vb 10
749 \& Key Description
750 \& 59 C\-C
751 \& 60 C\-N
752 \& 61 C\-O
753 \& 62 C\-S
754 \& 63 C\-Cl
755 \& 64 C\-P
756 \& 65 C\-F
757 \& 66 C\-Br
758 \& 67 C\-Si
759 \& 68 C\-I
760 \& 69 C\-X
761 \& 70 N\-N
762 \& 71 N\-O
763 \& 72 N\-S
764 \& 73 N\-Cl
765 \& 74 N\-P
766 \& 75 N\-F
767 \& 76 N\-Br
768 \& 77 N\-Si
769 \& 78 N\-I
770 \& 79 N\-X
771 \& 80 O\-O
772 \& 81 O\-S
773 \& 82 O\-Cl
774 \& 83 O\-P
775 \& 84 O\-F
776 \& 85 O\-Br
777 \& 86 O\-Si
778 \& 87 O\-I
779 \& 88 O\-X
780 \& 89 S\-S
781 \& 90 S\-Cl
782 \& 91 S\-P
783 \& 92 S\-F
784 \& 93 S\-Br
785 \& 94 S\-Si
786 \& 95 S\-I
787 \& 96 S\-X
788 \& 97 Cl\-Cl
789 \& 98 Cl\-P
790 \& 99 Cl\-F
791 \& 100 Cl\-Br
792 \& 101 Cl\-Si
793 \& 102 Cl\-I
794 \& 103 Cl\-X
795 \& 104 P\-P
796 \& 105 P\-F
797 \& 106 P\-Br
798 \& 107 P\-Si
799 \& 108 P\-I
800 \& 109 P\-X
801 \& 110 F\-F
802 \& 111 F\-Br
803 \& 112 F\-Si
804 \& 113 F\-I
805 \& 114 F\-X
806 \& 115 Br\-Br
807 \& 116 Br\-Si
808 \& 117 Br\-I
809 \& 118 Br\-X
810 \& 119 Si\-Si
811 \& 120 Si\-I
812 \& 121 Si\-X
813 \& 122 I\-I
814 \& 123 I\-X
815 \& 124 X\-X
816 \& 125 C=C
817 \& 126 C=N
818 \& 127 C=O
819 \& 128 C=S
820 \& 129 C=Cl
821 \& 130 C=P
822 \& 131 C=F
823 \& 132 C=Br
824 \& 133 C=Si
825 \& 134 C=I
826 \& 135 C=X
827 \& 136 N=N
828 \& 137 N=O
829 \& 138 N=S
830 \& 139 N=Cl
831 \& 140 N=P
832 \& 141 N=F
833 \& 142 N=Br
834 \& 143 N=Si
835 \& 144 N=I
836 \& 145 N=X
837 \& 146 O=O
838 \& 147 O=S
839 \& 148 O=Cl
840 \& 149 O=P
841 \& 150 O=F
842 \& 151 O=Br
843 \& 152 O=Si
844 \& 153 O=I
845 \& 154 O=X
846 \& 155 S=S
847 \& 156 S=Cl
848 \& 157 S=P
849 \& 158 S=F
850 \& 159 S=Br
851 \& 160 S=Si
852 \& 161 S=I
853 \& 162 S=X
854 \& 163 Cl=Cl
855 \& 164 Cl=P
856 \& 165 Cl=F
857 \& 166 Cl=Br
858 \& 167 Cl=Si
859 \& 168 Cl=I
860 \& 169 Cl=X
861 \& 170 P=P
862 \& 171 P=F
863 \& 172 P=Br
864 \& 173 P=Si
865 \& 174 P=I
866 \& 175 P=X
867 \& 176 F=F
868 \& 177 F=Br
869 \& 178 F=Si
870 \& 179 F=I
871 \& 180 F=X
872 \& 181 Br=Br
873 \& 182 Br=Si
874 \& 183 Br=I
875 \& 184 Br=X
876 \& 185 Si=Si
877 \& 186 Si=I
878 \& 187 Si=X
879 \& 188 I=I
880 \& 189 I=X
881 \& 190 X=X
882 \& 191 C#C
883 \& 192 C#N
884 \& 193 C#O
885 \& 194 C#S
886 \& 195 C#Cl
887 \& 196 C#P
888 \& 197 C#F
889 \& 198 C#Br
890 \& 199 C#Si
891 \& 200 C#I
892 \& 201 C#X
893 \& 202 N#N
894 \& 203 N#O
895 \& 204 N#S
896 \& 205 N#Cl
897 \& 206 N#P
898 \& 207 N#F
899 \& 208 N#Br
900 \& 209 N#Si
901 \& 210 N#I
902 \& 211 N#X
903 \& 212 O#O
904 \& 213 O#S
905 \& 214 O#Cl
906 \& 215 O#P
907 \& 216 O#F
908 \& 217 O#Br
909 \& 218 O#Si
910 \& 219 O#I
911 \& 220 O#X
912 \& 221 S#S
913 \& 222 S#Cl
914 \& 223 S#P
915 \& 224 S#F
916 \& 225 S#Br
917 \& 226 S#Si
918 \& 227 S#I
919 \& 228 S#X
920 \& 229 Cl#Cl
921 \& 230 Cl#P
922 \& 231 Cl#F
923 \& 232 Cl#Br
924 \& 233 Cl#Si
925 \& 234 Cl#I
926 \& 235 Cl#X
927 \& 236 P#P
928 \& 237 P#F
929 \& 238 P#Br
930 \& 239 P#Si
931 \& 240 P#I
932 \& 241 P#X
933 \& 242 F#F
934 \& 243 F#Br
935 \& 244 F#Si
936 \& 245 F#I
937 \& 246 F#X
938 \& 247 Br#Br
939 \& 248 Br#Si
940 \& 249 Br#I
941 \& 250 Br#X
942 \& 251 Si#Si
943 \& 252 Si#I
944 \& 253 Si#X
945 \& 254 I#I
946 \& 255 I#X
947 \& 256 X#X
948 \& 257 C$C
949 \& 258 C$N
950 \& 259 C$O
951 \& 260 C$S
952 \& 261 C$Cl
953 \& 262 C$P
954 \& 263 C$F
955 \& 264 C$Br
956 \& 265 C$Si
957 \& 266 C$I
958 \& 267 C$X
959 \& 268 N$N
960 \& 269 N$O
961 \& 270 N$S
962 \& 271 N$Cl
963 \& 272 N$P
964 \& 273 N$F
965 \& 274 N$Br
966 \& 275 N$Si
967 \& 276 N$I
968 \& 277 N$X
969 \& 278 O$O
970 \& 279 O$S
971 \& 280 O$Cl
972 \& 281 O$P
973 \& 282 O$F
974 \& 283 O$Br
975 \& 284 O$Si
976 \& 285 O$I
977 \& 286 O$X
978 \& 287 S$S
979 \& 288 S$Cl
980 \& 289 S$P
981 \& 290 S$F
982 \& 291 S$Br
983 \& 292 S$Si
984 \& 293 S$I
985 \& 294 S$X
986 \& 295 Cl$Cl
987 \& 296 Cl$P
988 \& 297 Cl$F
989 \& 298 Cl$Br
990 \& 299 Cl$Si
991 \& 300 Cl$I
992 \& 301 Cl$X
993 \& 302 P$P
994 \& 303 P$F
995 \& 304 P$Br
996 \& 305 P$Si
997 \& 306 P$I
998 \& 307 P$X
999 \& 308 F$F
1000 \& 309 F$Br
1001 \& 310 F$Si
1002 \& 311 F$I
1003 \& 312 F$X
1004 \& 313 Br$Br
1005 \& 314 Br$Si
1006 \& 315 Br$I
1007 \& 316 Br$X
1008 \& 317 Si$Si
1009 \& 318 Si$I
1010 \& 319 Si$X
1011 \& 320 I$I
1012 \& 321 I$X
1013 \& 322 X$X
1014 .Ve
1015 .IP "\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR" 4
1016 .IX Item "--OutDelim comma | tab | semicolon"
1017 Delimiter for output \s-1CSV/TSV\s0 text file(s). Possible values: \fIcomma, tab, or semicolon\fR
1018 Default value: \fIcomma\fR.
1019 .IP "\fB\-\-output\fR \fI\s-1SD\s0 | \s-1FP\s0 | text | all\fR" 4
1020 .IX Item "--output SD | FP | text | all"
1021 Type of output files to generate. Possible values: \fI\s-1SD\s0, \s-1FP\s0, text, or all\fR. Default value: \fItext\fR.
1022 .IP "\fB\-o, \-\-overwrite\fR" 4
1023 .IX Item "-o, --overwrite"
1024 Overwrite existing files.
1025 .IP "\fB\-q, \-\-quote\fR \fIYes | No\fR" 4
1026 .IX Item "-q, --quote Yes | No"
1027 Put quote around column values in output \s-1CSV/TSV\s0 text file(s). Possible values:
1028 \&\fIYes or No\fR. Default value: \fIYes\fR.
1029 .IP "\fB\-r, \-\-root\fR \fIRootName\fR" 4
1030 .IX Item "-r, --root RootName"
1031 New file name is generated using the root: <Root>.<Ext>. Default for new file
1032 names: <SDFileName><MACCSKeysFP>.<Ext>. The file type determines <Ext> value.
1033 The sdf, fpf, csv, and tsv <Ext> values are used for \s-1SD\s0, \s-1FP\s0, comma/semicolon, and tab
1034 delimited text files, respectively.This option is ignored for multiple input files.
1035 .IP "\fB\-s, \-\-size\fR \fInumber\fR" 4
1036 .IX Item "-s, --size number"
1037 Size of \s-1MACCS\s0 keys [ Ref 45\-47 ] set to use during fingerprints generation. Possible values: \fI166 or 322\fR.
1038 Default value: \fI166\fR.
1039 .IP "\fB\-v, \-\-VectorStringFormat\fR \fIValuesString | IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString\fR" 4
1040 .IX Item "-v, --VectorStringFormat ValuesString | IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString"
1041 Format of fingerprints vector string data in output \s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) specified by
1042 \&\fB\-\-output\fR used during \fIMACCSKeyCount\fR value of \fB\-m, \-\-mode\fR option. Possible
1043 values: \fIValuesString, IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString |
1044 ValuesAndIDsPairsString\fR. Defaultvalue: \fIValuesString\fR.
1045 .Sp
1046 Examples:
1047 .Sp
1048 .Vb 6
1049 \& FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
1050 \& ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1051 \& 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
1052 \& 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
1053 \& 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
1054 \& 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
1055 \&
1056 \& FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
1057 \& ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
1058 \& 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
1059 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1060 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
1061 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
1062 .Ve
1063 .IP "\fB\-w, \-\-WorkingDir\fR \fIDirName\fR" 4
1064 .IX Item "-w, --WorkingDir DirName"
1065 Location of working directory. Default: current directory.
1066 .SH "EXAMPLES"
1067 .IX Header "EXAMPLES"
1068 To generate \s-1MACCS\s0 keys fingerprints of size 166 in binary bit-vector string format
1069 and create a SampleMACCS166FPBin.csv file containing sequential compound IDs along with
1070 fingerprints bit-vector strings data, type:
1071 .PP
1072 .Vb 1
1073 \& % MACCSKeysFingerprints.pl \-r SampleMACCS166FPBin \-o Sample.sdf
1074 .Ve
1075 .PP
1076 To generate \s-1MACCS\s0 keys fingerprints of size 166 in binary bit-vector string format
1077 and create SampleMACCS166FPBin.sdf, SampleMACCS166FPBin.csv and SampleMACCS166FPBin.csv
1078 files containing sequential compound IDs in \s-1CSV\s0 file along with fingerprints bit-vector strings data, type:
1079 .PP
1080 .Vb 2
1081 \& % MACCSKeysFingerprints.pl \-\-output all \-r SampleMACCS166FPBin
1082 \& \-o Sample.sdf
1083 .Ve
1084 .PP
1085 To generate \s-1MACCS\s0 keys fingerprints of size 322 in binary bit-vector string format
1086 and create a SampleMACCS322FPBin.csv file containing sequential compound IDs along with
1087 fingerprints bit-vector strings data, type:
1088 .PP
1089 .Vb 1
1090 \& % MACCSKeysFingerprints.pl \-size 322 \-r SampleMACCS322FPBin \-o Sample.sdf
1091 .Ve
1092 .PP
1093 To generate \s-1MACCS\s0 keys fingerprints of size 166 corresponding to count of keys in
1094 ValuesString format and create a SampleMACCS166FPCount.csv file containing sequential
1095 compound IDs along with fingerprints vector strings data, type:
1096 .PP
1097 .Vb 2
1098 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-r SampleMACCS166FPCount
1099 \& \-o Sample.sdf
1100 .Ve
1101 .PP
1102 To generate \s-1MACCS\s0 keys fingerprints of size 322 corresponding to count of keys in
1103 ValuesString format and create a SampleMACCS322FPCount.csv file containing sequential
1104 compound IDs along with fingerprints vector strings data, type:
1105 .PP
1106 .Vb 2
1107 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-size 322
1108 \& \-r SampleMACCS322FPCount \-o Sample.sdf
1109 .Ve
1110 .PP
1111 To generate \s-1MACCS\s0 keys fingerprints of size 166 in hexadecimal bit-vector string format with
1112 ascending bits order and create a SampleMACCS166FPHex.csv file containing compound IDs
1113 from MolName along with fingerprints bit-vector strings data, type:
1114 .PP
1115 .Vb 3
1116 \& % MACCSKeysFingerprints.pl \-m MACCSKeyBits \-\-size 166 \-\-BitStringFormat
1117 \& HexadecimalString \-\-BitsOrder Ascending \-\-DataFieldsMode CompoundID
1118 \& \-\-CompoundIDMode MolName \-r SampleMACCS166FPBin \-o Sample.sdf
1119 .Ve
1120 .PP
1121 To generate \s-1MACCS\s0 keys fingerprints of size 166 corresponding to count of keys in
1122 IDsAndValuesString format and create a SampleMACCS166FPCount.csv file containing
1123 compound IDs from MolName line along with fingerprints vector strings data, type:
1124 .PP
1125 .Vb 3
1126 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-\-size 166
1127 \& \-\-VectorStringFormat IDsAndValuesString \-\-DataFieldsMode CompoundID
1128 \& \-\-CompoundIDMode MolName \-r SampleMACCS166FPCount \-o Sample.sdf
1129 .Ve
1130 .PP
1131 To generate \s-1MACCS\s0 keys fingerprints of size 166 corresponding to count of keys in
1132 IDsAndValuesString format and create a SampleMACCS166FPCount.csv file containing
1133 compound IDs using specified data field along with fingerprints vector strings data, type:
1134 .PP
1135 .Vb 4
1136 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-\-size 166
1137 \& \-\-VectorStringFormat IDsAndValuesString \-\-DataFieldsMode CompoundID
1138 \& \-\-CompoundIDMode DataField \-\-CompoundID Mol_ID \-r
1139 \& SampleMACCS166FPCount \-o Sample.sdf
1140 .Ve
1141 .PP
1142 To generate \s-1MACCS\s0 keys fingerprints of size 322 corresponding to count of keys in
1143 ValuesString format and create a SampleMACCS322FPCount.tsv file containing compound
1144 IDs derived from combination of molecule name line and an explicit compound prefix
1145 along with fingerprints vector strings data in a column labels MACCSKeyCountFP, type:
1146 .PP
1147 .Vb 4
1148 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-size 322 \-\-DataFieldsMode
1149 \& CompoundID \-\-CompoundIDMode MolnameOrLabelPrefix \-\-CompoundID Cmpd
1150 \& \-\-CompoundIDLabel MolID \-\-FingerprintsLabel MACCSKeyCountFP \-\-OutDelim
1151 \& Tab \-r SampleMACCS322FPCount \-o Sample.sdf
1152 .Ve
1153 .PP
1154 To generate \s-1MACCS\s0 keys fingerprints of size 166 corresponding to count of keys in
1155 ValuesString format and create a SampleMACCS166FPCount.csv file containing
1156 specific data fields columns along with fingerprints vector strings data, type:
1157 .PP
1158 .Vb 3
1159 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-\-size 166
1160 \& \-\-VectorStringFormat ValuesString \-\-DataFieldsMode Specify \-\-DataFields
1161 \& Mol_ID \-r SampleMACCS166FPCount \-o Sample.sdf
1162 .Ve
1163 .PP
1164 To generate \s-1MACCS\s0 keys fingerprints of size 322 corresponding to count of keys in
1165 ValuesString format and create a SampleMACCS322FPCount.csv file containing
1166 common data fields columns along with fingerprints vector strings data, type:
1167 .PP
1168 .Vb 3
1169 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-\-size 322
1170 \& \-\-VectorStringFormat ValuesString \-\-DataFieldsMode Common \-r
1171 \& SampleMACCS322FPCount \-o Sample.sdf
1172 .Ve
1173 .PP
1174 To generate \s-1MACCS\s0 keys fingerprints of size 166 corresponding to count of keys in
1175 ValuesString format and create SampleMACCS166FPCount.sdf, SampleMACCS166FPCount.fpf and
1176 SampleMACCS166FPCount.csv files containing all data fields columns in \s-1CSV\s0 file
1177 along with fingerprints vector strings data, type:
1178 .PP
1179 .Vb 3
1180 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-\-size 166 \-\-output all
1181 \& \-\-VectorStringFormat ValuesString \-\-DataFieldsMode All \-r
1182 \& SampleMACCS166FPCount \-o Sample.sdf
1183 .Ve
1184 .SH "AUTHOR"
1185 .IX Header "AUTHOR"
1186 Manish Sud <msud@san.rr.com>
1187 .SH "SEE ALSO"
1188 .IX Header "SEE ALSO"
1189 InfoFingerprintsFiles.pl, SimilarityMatricesFingerprints.pl, AtomNeighborhoodsFingerprints.pl,
1190 ExtendedConnectivityFingerprints.pl, PathLengthFingerprints.pl,
1191 TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl,
1192 TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl
1193 .SH "COPYRIGHT"
1194 .IX Header "COPYRIGHT"
1195 Copyright (C) 2015 Manish Sud. All rights reserved.
1196 .PP
1197 This file is part of MayaChemTools.
1198 .PP
1199 MayaChemTools is free software; you can redistribute it and/or modify it under
1200 the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free
1201 Software Foundation; either version 3 of the License, or (at your option)
1202 any later version.