0
|
1 NAME
|
|
2 MACCSKeysFingerprints.pl - Generate MACCS key fingerprints for SD files
|
|
3
|
|
4 SYNOPSIS
|
|
5 MACCSKeysFingerprints.pl SDFile(s)...
|
|
6
|
|
7 MACCSKeysFingerprints.pl [--AromaticityModel *AromaticityModelType*]
|
|
8 [--BitsOrder *Ascending | Descending*] [-b, --BitStringFormat
|
|
9 *BinaryString | HexadecimalString*] [--CompoundID *DataFieldName or
|
|
10 LabelPrefixString*] [--CompoundIDLabel *text*] [--CompoundIDMode
|
|
11 *DataField | MolName | LabelPrefix | MolNameOrLabelPrefix*]
|
|
12 [--DataFields *"FieldLabel1,FieldLabel2,..."*] [-d, --DataFieldsMode
|
|
13 *All | Common | Specify | CompoundID*] [-f, --Filter *Yes | No*]
|
|
14 [--FingerprintsLabel *text*] [-h, --help] [-k, --KeepLargestComponent
|
|
15 *Yes | No*] [-m, --mode *MACCSKeyBits | MACCSKeyCount*] [--OutDelim
|
|
16 *comma | tab | semicolon*] [--output *SD | FP | text | all*] [-o,
|
|
17 --overwrite] [-q, --quote *Yes | No*] [-r, --root *RootName*] [-s,
|
|
18 --size *number*] [-v, --VectorStringFormat *IDsAndValuesString |
|
|
19 IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString*]
|
|
20 [-w, --WorkingDir *DirName*]
|
|
21
|
|
22 DESCRIPTION
|
|
23 Generate MACCS (Molecular ACCess System) keys fingerprints [ Ref 45-47 ]
|
|
24 for *SDFile(s)* and create appropriate SD, FP or CSV/TSV text file(s)
|
|
25 containing fingerprints bit-vector or vector strings corresponding to
|
|
26 molecular fingerprints.
|
|
27
|
|
28 Multiple SDFile names are separated by spaces. The valid file extensions
|
|
29 are *.sdf* and *.sd*. All other file names are ignored. All the SD files
|
|
30 in a current directory can be specified either by **.sdf* or the current
|
|
31 directory name.
|
|
32
|
|
33 For each MACCS keys definition, atoms are processed to determine their
|
|
34 membership to the key and the appropriate molecular fingerprints strings
|
|
35 are generated. An atom can belong to multiple MACCS keys.
|
|
36
|
|
37 For *MACCSKeyBits* value of -m, --mode option, a fingerprint bit-vector
|
|
38 string containing zeros and ones is generated and for *MACCSKeyCount*
|
|
39 value, a fingerprint vector string corresponding to number of MACCS keys
|
|
40 [ Ref 45-47 ] is generated.
|
|
41
|
|
42 *MACCSKeyBits | MACCSKeyCount* values for -m, --mode option along with
|
|
43 two possible *166 | 322* values of -s, --size supports generation of
|
|
44 four different types of MACCS keys fingerprint: *MACCS166KeyBits,
|
|
45 MACCS166KeyCount, MACCS322KeyBits, MACCS322KeyCount*.
|
|
46
|
|
47 Example of *SD* file containing MAACS keys fingerprints string data:
|
|
48
|
|
49 ... ...
|
|
50 ... ...
|
|
51 $$$$
|
|
52 ... ...
|
|
53 ... ...
|
|
54 ... ...
|
|
55 41 44 0 0 0 0 0 0 0 0999 V2000
|
|
56 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
57 ... ...
|
|
58 2 3 1 0 0 0 0
|
|
59 ... ...
|
|
60 M END
|
|
61 > <CmpdID>
|
|
62 Cmpd1
|
|
63
|
|
64 > <MACCSKeysFingerprints>
|
|
65 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;000000000
|
|
66 00000000000000000000000000000000100100001001000000001001000000001110001
|
|
67 00101010111100011011000100110110000011011110100110111111111111011111111
|
|
68 11111111110111000
|
|
69
|
|
70 $$$$
|
|
71 ... ...
|
|
72 ... ...
|
|
73
|
|
74 Example of *FP* file containing MAACS keys fingerprints string data:
|
|
75
|
|
76 #
|
|
77 # Package = MayaChemTools 7.4
|
|
78 # Release Date = Oct 21, 2010
|
|
79 #
|
|
80 # TimeStamp = Fri Mar 11 14:57:24 2011
|
|
81 #
|
|
82 # FingerprintsStringType = FingerprintsBitVector
|
|
83 #
|
|
84 # Description = MACCSKeyBits
|
|
85 # Size = 166
|
|
86 # BitStringFormat = BinaryString
|
|
87 # BitsOrder = Ascending
|
|
88 #
|
|
89 Cmpd1 00000000000000000000000000000000000000000100100001001000000001...
|
|
90 Cmpd2 00000000000000000000000010000000001000000010000000001000000000...
|
|
91 ... ...
|
|
92 ... ..
|
|
93
|
|
94 Example of CSV *Text* file containing MAACS keys fingerprints string
|
|
95 data:
|
|
96
|
|
97 "CompoundID","MACCSKeysFingerprints"
|
|
98 "Cmpd1","FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;
|
|
99 00000000000000000000000000000000000000000100100001001000000001001000000
|
|
100 00111000100101010111100011011000100110110000011011110100110111111111111
|
|
101 01111111111111111110111000"
|
|
102 ... ...
|
|
103 ... ...
|
|
104
|
|
105 The current release of MayaChemTools generates the following types of
|
|
106 MACCS keys fingerprints bit-vector and vector strings:
|
|
107
|
|
108 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
|
|
109 0000000000000000000000000000000001001000010010000000010010000000011100
|
|
110 0100101010111100011011000100110110000011011110100110111111111111011111
|
|
111 11111111111110111000
|
|
112
|
|
113 FingerprintsBitVector;MACCSKeyBits;166;HexadecimalString;Ascending;000
|
|
114 000000021210210e845f8d8c60b79dffbffffd1
|
|
115
|
|
116 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
|
|
117 1110011111100101111111000111101100110000000000000011100010000000000000
|
|
118 0000000000000000000000000000000000000000000000101000000000000000000000
|
|
119 0000000000000000000000000000000000000000000000000000000000000000000000
|
|
120 0000000000000000000000000000000000000011000000000000000000000000000000
|
|
121 0000000000000000000000000000000000000000
|
|
122
|
|
123 FingerprintsBitVector;MACCSKeyBits;322;HexadecimalString;Ascending;7d7
|
|
124 e7af3edc000c1100000000000000500000000000000000000000000000000300000000
|
|
125 000000000
|
|
126
|
|
127 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
|
|
128 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
129 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
|
|
130 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
|
|
131 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
|
|
132 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
|
|
133
|
|
134 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
|
|
135 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
|
|
136 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
|
|
137 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
138 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
|
|
139 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
|
|
140
|
|
141 OPTIONS
|
|
142 --AromaticityModel *MDLAromaticityModel | TriposAromaticityModel |
|
|
143 MMFFAromaticityModel | ChemAxonBasicAromaticityModel |
|
|
144 ChemAxonGeneralAromaticityModel | DaylightAromaticityModel |
|
|
145 MayaChemToolsAromaticityModel*
|
|
146 Specify aromaticity model to use during detection of aromaticity.
|
|
147 Possible values in the current release are: *MDLAromaticityModel,
|
|
148 TriposAromaticityModel, MMFFAromaticityModel,
|
|
149 ChemAxonBasicAromaticityModel, ChemAxonGeneralAromaticityModel,
|
|
150 DaylightAromaticityModel or MayaChemToolsAromaticityModel*. Default
|
|
151 value: *MayaChemToolsAromaticityModel*.
|
|
152
|
|
153 The supported aromaticity model names along with model specific
|
|
154 control parameters are defined in AromaticityModelsData.csv, which
|
|
155 is distributed with the current release and is available under
|
|
156 lib/data directory. Molecule.pm module retrieves data from this file
|
|
157 during class instantiation and makes it available to method
|
|
158 DetectAromaticity for detecting aromaticity corresponding to a
|
|
159 specific model.
|
|
160
|
|
161 --BitsOrder *Ascending | Descending*
|
|
162 Bits order to use during generation of fingerprints bit-vector
|
|
163 string for *MACCSKeyBits* value of -m, --mode option. Possible
|
|
164 values: *Ascending, Descending*. Default: *Ascending*.
|
|
165
|
|
166 *Ascending* bit order which corresponds to first bit in each byte as
|
|
167 the lowest bit as opposed to the highest bit.
|
|
168
|
|
169 Internally, bits are stored in *Ascending* order using Perl vec
|
|
170 function. Regardless of machine order, big-endian or little-endian,
|
|
171 vec function always considers first string byte as the lowest byte
|
|
172 and first bit within each byte as the lowest bit.
|
|
173
|
|
174 -b, --BitStringFormat *BinaryString | HexadecimalString*
|
|
175 Format of fingerprints bit-vector string data in output SD, FP or
|
|
176 CSV/TSV text file(s) specified by --output used during
|
|
177 *MACCSKeyBits* value of -m, --mode option. Possible values:
|
|
178 *BinaryString, HexadecimalString*. Default value: *BinaryString*.
|
|
179
|
|
180 *BinaryString* corresponds to an ASCII string containing 1s and 0s.
|
|
181 *HexadecimalString* contains bit values in ASCII hexadecimal format.
|
|
182
|
|
183 Examples:
|
|
184
|
|
185 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
|
|
186 0000000000000000000000000000000001001000010010000000010010000000011100
|
|
187 0100101010111100011011000100110110000011011110100110111111111111011111
|
|
188 11111111111110111000
|
|
189
|
|
190 FingerprintsBitVector;MACCSKeyBits;166;HexadecimalString;Ascending;000
|
|
191 000000021210210e845f8d8c60b79dffbffffd1
|
|
192
|
|
193 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
|
|
194 1110011111100101111111000111101100110000000000000011100010000000000000
|
|
195 0000000000000000000000000000000000000000000000101000000000000000000000
|
|
196 0000000000000000000000000000000000000000000000000000000000000000000000
|
|
197 0000000000000000000000000000000000000011000000000000000000000000000000
|
|
198 0000000000000000000000000000000000000000
|
|
199
|
|
200 FingerprintsBitVector;MACCSKeyBits;322;HexadecimalString;Ascending;7d7
|
|
201 e7af3edc000c1100000000000000500000000000000000000000000000000300000000
|
|
202 000000000
|
|
203
|
|
204 --CompoundID *DataFieldName or LabelPrefixString*
|
|
205 This value is --CompoundIDMode specific and indicates how compound
|
|
206 ID is generated.
|
|
207
|
|
208 For *DataField* value of --CompoundIDMode option, it corresponds to
|
|
209 datafield label name whose value is used as compound ID; otherwise,
|
|
210 it's a prefix string used for generating compound IDs like
|
|
211 LabelPrefixString<Number>. Default value, *Cmpd*, generates compound
|
|
212 IDs which look like Cmpd<Number>.
|
|
213
|
|
214 Examples for *DataField* value of --CompoundIDMode:
|
|
215
|
|
216 MolID
|
|
217 ExtReg
|
|
218
|
|
219 Examples for *LabelPrefix* or *MolNameOrLabelPrefix* value of
|
|
220 --CompoundIDMode:
|
|
221
|
|
222 Compound
|
|
223
|
|
224 The value specified above generates compound IDs which correspond to
|
|
225 Compound<Number> instead of default value of Cmpd<Number>.
|
|
226
|
|
227 --CompoundIDLabel *text*
|
|
228 Specify compound ID column label for FP or CSV/TSV text file(s) used
|
|
229 during *CompoundID* value of --DataFieldsMode option. Default:
|
|
230 *CompoundID*.
|
|
231
|
|
232 --CompoundIDMode *DataField | MolName | LabelPrefix |
|
|
233 MolNameOrLabelPrefix*
|
|
234 Specify how to generate compound IDs and write to FP or CSV/TSV text
|
|
235 file(s) along with generated fingerprints for *FP | text | all*
|
|
236 values of --output option: use a *SDFile(s)* datafield value; use
|
|
237 molname line from *SDFile(s)*; generate a sequential ID with
|
|
238 specific prefix; use combination of both MolName and LabelPrefix
|
|
239 with usage of LabelPrefix values for empty molname lines.
|
|
240
|
|
241 Possible values: *DataField | MolName | LabelPrefix |
|
|
242 MolNameOrLabelPrefix*. Default: *LabelPrefix*.
|
|
243
|
|
244 For *MolNameAndLabelPrefix* value of --CompoundIDMode, molname line
|
|
245 in *SDFile(s)* takes precedence over sequential compound IDs
|
|
246 generated using *LabelPrefix* and only empty molname values are
|
|
247 replaced with sequential compound IDs.
|
|
248
|
|
249 This is only used for *CompoundID* value of --DataFieldsMode option.
|
|
250
|
|
251 --DataFields *"FieldLabel1,FieldLabel2,..."*
|
|
252 Comma delimited list of *SDFiles(s)* data fields to extract and
|
|
253 write to CSV/TSV text file(s) along with generated fingerprints for
|
|
254 *text | all* values of --output option.
|
|
255
|
|
256 This is only used for *Specify* value of --DataFieldsMode option.
|
|
257
|
|
258 Examples:
|
|
259
|
|
260 Extreg
|
|
261 MolID,CompoundName
|
|
262
|
|
263 -d, --DataFieldsMode *All | Common | Specify | CompoundID*
|
|
264 Specify how data fields in *SDFile(s)* are transferred to output
|
|
265 CSV/TSV text file(s) along with generated fingerprints for *text |
|
|
266 all* values of --output option: transfer all SD data field; transfer
|
|
267 SD data files common to all compounds; extract specified data
|
|
268 fields; generate a compound ID using molname line, a compound
|
|
269 prefix, or a combination of both. Possible values: *All | Common |
|
|
270 specify | CompoundID*. Default value: *CompoundID*.
|
|
271
|
|
272 -f, --Filter *Yes | No*
|
|
273 Specify whether to check and filter compound data in SDFile(s).
|
|
274 Possible values: *Yes or No*. Default value: *Yes*.
|
|
275
|
|
276 By default, compound data is checked before calculating fingerprints
|
|
277 and compounds containing atom data corresponding to non-element
|
|
278 symbols or no atom data are ignored.
|
|
279
|
|
280 --FingerprintsLabel *text*
|
|
281 SD data label or text file column label to use for fingerprints
|
|
282 string in output SD or CSV/TSV text file(s) specified by --output.
|
|
283 Default value: *MACCSKeyFingerprints*.
|
|
284
|
|
285 -h, --help
|
|
286 Print this help message.
|
|
287
|
|
288 -k, --KeepLargestComponent *Yes | No*
|
|
289 Generate fingerprints for only the largest component in molecule.
|
|
290 Possible values: *Yes or No*. Default value: *Yes*.
|
|
291
|
|
292 For molecules containing multiple connected components, fingerprints
|
|
293 can be generated in two different ways: use all connected components
|
|
294 or just the largest connected component. By default, all atoms
|
|
295 except for the largest connected component are deleted before
|
|
296 generation of fingerprints.
|
|
297
|
|
298 -m, --mode *MACCSKeyBits | MACCSKeyCount*
|
|
299 Specify type of MACCS keys [ Ref 45-47 ] fingerprints to generate
|
|
300 for molecules in *SDFile(s)*. Possible values: *MACCSKeyBits,
|
|
301 MACCSKeyCount*. Default value: *MACCSKeyBits*.
|
|
302
|
|
303 For *MACCSKeyBits* value of -m, --mode option, a fingerprint
|
|
304 bit-vector string containing zeros and ones is generated and for
|
|
305 *MACCSKeyCount* value, a fingerprint vector string corresponding to
|
|
306 number of MACCS keys is generated.
|
|
307
|
|
308 *MACCSKeyBits | MACCSKeyCount* values for -m, --mode option along
|
|
309 with two possible *166 | 322* values of -s, --size supports
|
|
310 generation of four different types of MACCS keys fingerprint:
|
|
311 *MACCS166KeyBits, MACCS166KeyCount, MACCS322KeyBits,
|
|
312 MACCS322KeyCount*.
|
|
313
|
|
314 Definition of MACCS keys uses the following atom and bond symbols to
|
|
315 define atom and bond environments:
|
|
316
|
|
317 Atom symbols for 166 keys [ Ref 47 ]:
|
|
318
|
|
319 A : Any valid periodic table element symbol
|
|
320 Q : Hetro atoms; any non-C or non-H atom
|
|
321 X : Halogens; F, Cl, Br, I
|
|
322 Z : Others; other than H, C, N, O, Si, P, S, F, Cl, Br, I
|
|
323
|
|
324 Atom symbols for 322 keys [ Ref 46 ]:
|
|
325
|
|
326 A : Any valid periodic table element symbol
|
|
327 Q : Hetro atoms; any non-C or non-H atom
|
|
328 X : Others; other than H, C, N, O, Si, P, S, F, Cl, Br, I
|
|
329 Z is neither defined nor used
|
|
330
|
|
331 Bond types:
|
|
332
|
|
333 - : Single
|
|
334 = : Double
|
|
335 T : Triple
|
|
336 # : Triple
|
|
337 ~ : Single or double query bond
|
|
338 % : An aromatic query bond
|
|
339
|
|
340 None : Any bond type; no explicit bond specified
|
|
341
|
|
342 $ : Ring bond; $ before a bond type specifies ring bond
|
|
343 ! : Chain or non-ring bond; ! before a bond type specifies chain bond
|
|
344
|
|
345 @ : A ring linkage and the number following it specifies the
|
|
346 atoms position in the line, thus @1 means linked back to the first
|
|
347 atom in the list.
|
|
348
|
|
349 Aromatic: Kekule or Arom5
|
|
350
|
|
351 Kekule: Bonds in 6-membered rings with alternate single/double bonds
|
|
352 or perimeter bonds
|
|
353 Arom5: Bonds in 5-membered rings with two double bonds and a hetro
|
|
354 atom at the apex of the ring.
|
|
355
|
|
356 MACCS 166 keys [ Ref 45-47 ] are defined as follows:
|
|
357
|
|
358 Key Description
|
|
359
|
|
360 1 ISOTOPE
|
|
361 2 103 < ATOMIC NO. < 256
|
|
362 3 GROUP IVA,VA,VIA PERIODS 4-6 (Ge...)
|
|
363 4 ACTINIDE
|
|
364 5 GROUP IIIB,IVB (Sc...)
|
|
365 6 LANTHANIDE
|
|
366 7 GROUP VB,VIB,VIIB (V...)
|
|
367 8 QAAA@1
|
|
368 9 GROUP VIII (Fe...)
|
|
369 10 GROUP IIA (ALKALINE EARTH)
|
|
370 11 4M RING
|
|
371 12 GROUP IB,IIB (Cu...)
|
|
372 13 ON(C)C
|
|
373 14 S-S
|
|
374 15 OC(O)O
|
|
375 16 QAA@1
|
|
376 17 CTC
|
|
377 18 GROUP IIIA (B...)
|
|
378 19 7M RING
|
|
379 20 SI
|
|
380 21 C=C(Q)Q
|
|
381 22 3M RING
|
|
382 23 NC(O)O
|
|
383 24 N-O
|
|
384 25 NC(N)N
|
|
385 26 C$=C($A)$A
|
|
386 27 I
|
|
387 28 QCH2Q
|
|
388 29 P
|
|
389 30 CQ(C)(C)A
|
|
390 31 QX
|
|
391 32 CSN
|
|
392 33 NS
|
|
393 34 CH2=A
|
|
394 35 GROUP IA (ALKALI METAL)
|
|
395 36 S HETEROCYCLE
|
|
396 37 NC(O)N
|
|
397 38 NC(C)N
|
|
398 39 OS(O)O
|
|
399 40 S-O
|
|
400 41 CTN
|
|
401 42 F
|
|
402 43 QHAQH
|
|
403 44 OTHER
|
|
404 45 C=CN
|
|
405 46 BR
|
|
406 47 SAN
|
|
407 48 OQ(O)O
|
|
408 49 CHARGE
|
|
409 50 C=C(C)C
|
|
410 51 CSO
|
|
411 52 NN
|
|
412 53 QHAAAQH
|
|
413 54 QHAAQH
|
|
414 55 OSO
|
|
415 56 ON(O)C
|
|
416 57 O HETEROCYCLE
|
|
417 58 QSQ
|
|
418 59 Snot%A%A
|
|
419 60 S=O
|
|
420 61 AS(A)A
|
|
421 62 A$A!A$A
|
|
422 63 N=O
|
|
423 64 A$A!S
|
|
424 65 C%N
|
|
425 66 CC(C)(C)A
|
|
426 67 QS
|
|
427 68 QHQH (&...)
|
|
428 69 QQH
|
|
429 70 QNQ
|
|
430 71 NO
|
|
431 72 OAAO
|
|
432 73 S=A
|
|
433 74 CH3ACH3
|
|
434 75 A!N$A
|
|
435 76 C=C(A)A
|
|
436 77 NAN
|
|
437 78 C=N
|
|
438 79 NAAN
|
|
439 80 NAAAN
|
|
440 81 SA(A)A
|
|
441 82 ACH2QH
|
|
442 83 QAAAA@1
|
|
443 84 NH2
|
|
444 85 CN(C)C
|
|
445 86 CH2QCH2
|
|
446 87 X!A$A
|
|
447 88 S
|
|
448 89 OAAAO
|
|
449 90 QHAACH2A
|
|
450 91 QHAAACH2A
|
|
451 92 OC(N)C
|
|
452 93 QCH3
|
|
453 94 QN
|
|
454 95 NAAO
|
|
455 96 5M RING
|
|
456 97 NAAAO
|
|
457 98 QAAAAA@1
|
|
458 99 C=C
|
|
459 100 ACH2N
|
|
460 101 8M RING
|
|
461 102 QO
|
|
462 103 CL
|
|
463 104 QHACH2A
|
|
464 105 A$A($A)$A
|
|
465 106 QA(Q)Q
|
|
466 107 XA(A)A
|
|
467 108 CH3AAACH2A
|
|
468 109 ACH2O
|
|
469 110 NCO
|
|
470 111 NACH2A
|
|
471 112 AA(A)(A)A
|
|
472 113 Onot%A%A
|
|
473 114 CH3CH2A
|
|
474 115 CH3ACH2A
|
|
475 116 CH3AACH2A
|
|
476 117 NAO
|
|
477 118 ACH2CH2A > 1
|
|
478 119 N=A
|
|
479 120 HETEROCYCLIC ATOM > 1 (&...)
|
|
480 121 N HETEROCYCLE
|
|
481 122 AN(A)A
|
|
482 123 OCO
|
|
483 124 QQ
|
|
484 125 AROMATIC RING > 1
|
|
485 126 A!O!A
|
|
486 127 A$A!O > 1 (&...)
|
|
487 128 ACH2AAACH2A
|
|
488 129 ACH2AACH2A
|
|
489 130 QQ > 1 (&...)
|
|
490 131 QH > 1
|
|
491 132 OACH2A
|
|
492 133 A$A!N
|
|
493 134 X (HALOGEN)
|
|
494 135 Nnot%A%A
|
|
495 136 O=A > 1
|
|
496 137 HETEROCYCLE
|
|
497 138 QCH2A > 1 (&...)
|
|
498 139 OH
|
|
499 140 O > 3 (&...)
|
|
500 141 CH3 > 2 (&...)
|
|
501 142 N > 1
|
|
502 143 A$A!O
|
|
503 144 Anot%A%Anot%A
|
|
504 145 6M RING > 1
|
|
505 146 O > 2
|
|
506 147 ACH2CH2A
|
|
507 148 AQ(A)A
|
|
508 149 CH3 > 1
|
|
509 150 A!A$A!A
|
|
510 151 NH
|
|
511 152 OC(C)C
|
|
512 153 QCH2A
|
|
513 154 C=O
|
|
514 155 A!CH2!A
|
|
515 156 NA(A)A
|
|
516 157 C-O
|
|
517 158 C-N
|
|
518 159 O > 1
|
|
519 160 CH3
|
|
520 161 N
|
|
521 162 AROMATIC
|
|
522 163 6M RING
|
|
523 164 O
|
|
524 165 RING
|
|
525 166 FRAGMENTS
|
|
526
|
|
527 MACCS 322 keys set as defined in tables 1, 2 and 3 [ Ref 46 ]
|
|
528 include:
|
|
529
|
|
530 . 26 atom properties of type P, as listed in Table 1
|
|
531 . 32 one-atom environments, as listed in Table 3
|
|
532 . 264 atom-bond-atom combinations listed in Table 4
|
|
533
|
|
534 Total number of keys in three tables is : 322
|
|
535
|
|
536 Atom symbol, X, used for 322 keys [ Ref 46 ] doesn't refer to
|
|
537 Halogens as it does for 166 keys. In order to keep the definition of
|
|
538 322 keys consistent with the published definitions, the symbol X is
|
|
539 used to imply "others" atoms, but it's internally mapped to symbol X
|
|
540 as defined for 166 keys during the generation of key values.
|
|
541
|
|
542 Atom properties-based keys (26):
|
|
543
|
|
544 Key Description
|
|
545 1 A(AAA) or AA(A)A - atom with at least three neighbors
|
|
546 2 Q - heteroatom
|
|
547 3 Anot%not-A - atom involved in one or more multiple bonds, not aromatic
|
|
548 4 A(AAAA) or AA(A)(A)A - atom with at least four neighbors
|
|
549 5 A(QQ) or QA(Q) - atom with at least two heteroatom neighbors
|
|
550 6 A(QQQ) or QA(Q)Q - atom with at least three heteroatom neighbors
|
|
551 7 QH - heteroatom with at least one hydrogen attached
|
|
552 8 CH2(AA) or ACH2A - carbon with at least two single bonds and at least
|
|
553 two hydrogens attached
|
|
554 9 CH3(A) or ACH3 - carbon with at least one single bond and at least three
|
|
555 hydrogens attached
|
|
556 10 Halogen
|
|
557 11 A(-A-A-A) or A-A(-A)-A - atom has at least three single bonds
|
|
558 12 AAAAAA@1 > 2 - atom is in at least two different six-membered rings
|
|
559 13 A($A$A$A) or A$A($A)$A - atom has more than two ring bonds
|
|
560 14 A$A!A$A - atom is at a ring/chain boundary. When a comparison is done
|
|
561 with another atom the path passes through the chain bond.
|
|
562 15 Anot%A%Anot%A - atom is at an aromatic/nonaromatic boundary. When a
|
|
563 comparison is done with another atom the path
|
|
564 passes through the aromatic bond.
|
|
565 16 A!A!A - atom with more than one chain bond
|
|
566 17 A!A$A!A - atom is at a ring/chain boundary. When a comparison is done
|
|
567 with another atom the path passes through the ring bond.
|
|
568 18 A%Anot%A%A - atom is at an aromatic/nonaromatic boundary. When a
|
|
569 comparison is done with another atom the
|
|
570 path passes through the nonaromatic bond.
|
|
571 19 HETEROCYCLE - atom is a heteroatom in a ring.
|
|
572 20 rare properties: atom with five or more neighbors, atom in
|
|
573 four or more rings, or atom types other than
|
|
574 H, C, N, O, S, F, Cl, Br, or I
|
|
575 21 rare properties: atom has a charge, is an isotope, has two or
|
|
576 more multiple bonds, or has a triple bond.
|
|
577 22 N - nitrogen
|
|
578 23 S - sulfur
|
|
579 24 O - oxygen
|
|
580 25 A(AA)A(A)A(AA) - atom has two neighbors, each with three or
|
|
581 more neighbors (including the central atom).
|
|
582 26 CHACH2 - atom has two hydrocarbon (CH2) neighbors
|
|
583
|
|
584 Atomic environments properties-based keys (32):
|
|
585
|
|
586 Key Description
|
|
587 27 C(CC)
|
|
588 28 C(CCC)
|
|
589 29 C(CN)
|
|
590 30 C(CCN)
|
|
591 31 C(NN)
|
|
592 32 C(NNC)
|
|
593 33 C(NNN)
|
|
594 34 C(CO)
|
|
595 35 C(CCO)
|
|
596 36 C(NO)
|
|
597 37 C(NCO)
|
|
598 38 C(NNO)
|
|
599 39 C(OO)
|
|
600 40 C(COO)
|
|
601 41 C(NOO)
|
|
602 42 C(OOO)
|
|
603 43 Q(CC)
|
|
604 44 Q(CCC)
|
|
605 45 Q(CN)
|
|
606 46 Q(CCN)
|
|
607 47 Q(NN)
|
|
608 48 Q(CNN)
|
|
609 49 Q(NNN)
|
|
610 50 Q(CO)
|
|
611 51 Q(CCO)
|
|
612 52 Q(NO)
|
|
613 53 Q(CNO)
|
|
614 54 Q(NNO)
|
|
615 55 Q(OO)
|
|
616 56 Q(COO)
|
|
617 57 Q(NOO)
|
|
618 58 Q(OOO)
|
|
619
|
|
620 Note: The first symbol is the central atom, with atoms bonded to the
|
|
621 central atom listed in parentheses. Q is any non-C, non-H atom. If
|
|
622 only two atoms are in parentheses, there is no implication
|
|
623 concerning the other atoms bonded to the central atom.
|
|
624
|
|
625 Atom-Bond-Atom properties-based keys: (264)
|
|
626
|
|
627 Key Description
|
|
628 59 C-C
|
|
629 60 C-N
|
|
630 61 C-O
|
|
631 62 C-S
|
|
632 63 C-Cl
|
|
633 64 C-P
|
|
634 65 C-F
|
|
635 66 C-Br
|
|
636 67 C-Si
|
|
637 68 C-I
|
|
638 69 C-X
|
|
639 70 N-N
|
|
640 71 N-O
|
|
641 72 N-S
|
|
642 73 N-Cl
|
|
643 74 N-P
|
|
644 75 N-F
|
|
645 76 N-Br
|
|
646 77 N-Si
|
|
647 78 N-I
|
|
648 79 N-X
|
|
649 80 O-O
|
|
650 81 O-S
|
|
651 82 O-Cl
|
|
652 83 O-P
|
|
653 84 O-F
|
|
654 85 O-Br
|
|
655 86 O-Si
|
|
656 87 O-I
|
|
657 88 O-X
|
|
658 89 S-S
|
|
659 90 S-Cl
|
|
660 91 S-P
|
|
661 92 S-F
|
|
662 93 S-Br
|
|
663 94 S-Si
|
|
664 95 S-I
|
|
665 96 S-X
|
|
666 97 Cl-Cl
|
|
667 98 Cl-P
|
|
668 99 Cl-F
|
|
669 100 Cl-Br
|
|
670 101 Cl-Si
|
|
671 102 Cl-I
|
|
672 103 Cl-X
|
|
673 104 P-P
|
|
674 105 P-F
|
|
675 106 P-Br
|
|
676 107 P-Si
|
|
677 108 P-I
|
|
678 109 P-X
|
|
679 110 F-F
|
|
680 111 F-Br
|
|
681 112 F-Si
|
|
682 113 F-I
|
|
683 114 F-X
|
|
684 115 Br-Br
|
|
685 116 Br-Si
|
|
686 117 Br-I
|
|
687 118 Br-X
|
|
688 119 Si-Si
|
|
689 120 Si-I
|
|
690 121 Si-X
|
|
691 122 I-I
|
|
692 123 I-X
|
|
693 124 X-X
|
|
694 125 C=C
|
|
695 126 C=N
|
|
696 127 C=O
|
|
697 128 C=S
|
|
698 129 C=Cl
|
|
699 130 C=P
|
|
700 131 C=F
|
|
701 132 C=Br
|
|
702 133 C=Si
|
|
703 134 C=I
|
|
704 135 C=X
|
|
705 136 N=N
|
|
706 137 N=O
|
|
707 138 N=S
|
|
708 139 N=Cl
|
|
709 140 N=P
|
|
710 141 N=F
|
|
711 142 N=Br
|
|
712 143 N=Si
|
|
713 144 N=I
|
|
714 145 N=X
|
|
715 146 O=O
|
|
716 147 O=S
|
|
717 148 O=Cl
|
|
718 149 O=P
|
|
719 150 O=F
|
|
720 151 O=Br
|
|
721 152 O=Si
|
|
722 153 O=I
|
|
723 154 O=X
|
|
724 155 S=S
|
|
725 156 S=Cl
|
|
726 157 S=P
|
|
727 158 S=F
|
|
728 159 S=Br
|
|
729 160 S=Si
|
|
730 161 S=I
|
|
731 162 S=X
|
|
732 163 Cl=Cl
|
|
733 164 Cl=P
|
|
734 165 Cl=F
|
|
735 166 Cl=Br
|
|
736 167 Cl=Si
|
|
737 168 Cl=I
|
|
738 169 Cl=X
|
|
739 170 P=P
|
|
740 171 P=F
|
|
741 172 P=Br
|
|
742 173 P=Si
|
|
743 174 P=I
|
|
744 175 P=X
|
|
745 176 F=F
|
|
746 177 F=Br
|
|
747 178 F=Si
|
|
748 179 F=I
|
|
749 180 F=X
|
|
750 181 Br=Br
|
|
751 182 Br=Si
|
|
752 183 Br=I
|
|
753 184 Br=X
|
|
754 185 Si=Si
|
|
755 186 Si=I
|
|
756 187 Si=X
|
|
757 188 I=I
|
|
758 189 I=X
|
|
759 190 X=X
|
|
760 191 C#C
|
|
761 192 C#N
|
|
762 193 C#O
|
|
763 194 C#S
|
|
764 195 C#Cl
|
|
765 196 C#P
|
|
766 197 C#F
|
|
767 198 C#Br
|
|
768 199 C#Si
|
|
769 200 C#I
|
|
770 201 C#X
|
|
771 202 N#N
|
|
772 203 N#O
|
|
773 204 N#S
|
|
774 205 N#Cl
|
|
775 206 N#P
|
|
776 207 N#F
|
|
777 208 N#Br
|
|
778 209 N#Si
|
|
779 210 N#I
|
|
780 211 N#X
|
|
781 212 O#O
|
|
782 213 O#S
|
|
783 214 O#Cl
|
|
784 215 O#P
|
|
785 216 O#F
|
|
786 217 O#Br
|
|
787 218 O#Si
|
|
788 219 O#I
|
|
789 220 O#X
|
|
790 221 S#S
|
|
791 222 S#Cl
|
|
792 223 S#P
|
|
793 224 S#F
|
|
794 225 S#Br
|
|
795 226 S#Si
|
|
796 227 S#I
|
|
797 228 S#X
|
|
798 229 Cl#Cl
|
|
799 230 Cl#P
|
|
800 231 Cl#F
|
|
801 232 Cl#Br
|
|
802 233 Cl#Si
|
|
803 234 Cl#I
|
|
804 235 Cl#X
|
|
805 236 P#P
|
|
806 237 P#F
|
|
807 238 P#Br
|
|
808 239 P#Si
|
|
809 240 P#I
|
|
810 241 P#X
|
|
811 242 F#F
|
|
812 243 F#Br
|
|
813 244 F#Si
|
|
814 245 F#I
|
|
815 246 F#X
|
|
816 247 Br#Br
|
|
817 248 Br#Si
|
|
818 249 Br#I
|
|
819 250 Br#X
|
|
820 251 Si#Si
|
|
821 252 Si#I
|
|
822 253 Si#X
|
|
823 254 I#I
|
|
824 255 I#X
|
|
825 256 X#X
|
|
826 257 C$C
|
|
827 258 C$N
|
|
828 259 C$O
|
|
829 260 C$S
|
|
830 261 C$Cl
|
|
831 262 C$P
|
|
832 263 C$F
|
|
833 264 C$Br
|
|
834 265 C$Si
|
|
835 266 C$I
|
|
836 267 C$X
|
|
837 268 N$N
|
|
838 269 N$O
|
|
839 270 N$S
|
|
840 271 N$Cl
|
|
841 272 N$P
|
|
842 273 N$F
|
|
843 274 N$Br
|
|
844 275 N$Si
|
|
845 276 N$I
|
|
846 277 N$X
|
|
847 278 O$O
|
|
848 279 O$S
|
|
849 280 O$Cl
|
|
850 281 O$P
|
|
851 282 O$F
|
|
852 283 O$Br
|
|
853 284 O$Si
|
|
854 285 O$I
|
|
855 286 O$X
|
|
856 287 S$S
|
|
857 288 S$Cl
|
|
858 289 S$P
|
|
859 290 S$F
|
|
860 291 S$Br
|
|
861 292 S$Si
|
|
862 293 S$I
|
|
863 294 S$X
|
|
864 295 Cl$Cl
|
|
865 296 Cl$P
|
|
866 297 Cl$F
|
|
867 298 Cl$Br
|
|
868 299 Cl$Si
|
|
869 300 Cl$I
|
|
870 301 Cl$X
|
|
871 302 P$P
|
|
872 303 P$F
|
|
873 304 P$Br
|
|
874 305 P$Si
|
|
875 306 P$I
|
|
876 307 P$X
|
|
877 308 F$F
|
|
878 309 F$Br
|
|
879 310 F$Si
|
|
880 311 F$I
|
|
881 312 F$X
|
|
882 313 Br$Br
|
|
883 314 Br$Si
|
|
884 315 Br$I
|
|
885 316 Br$X
|
|
886 317 Si$Si
|
|
887 318 Si$I
|
|
888 319 Si$X
|
|
889 320 I$I
|
|
890 321 I$X
|
|
891 322 X$X
|
|
892
|
|
893 --OutDelim *comma | tab | semicolon*
|
|
894 Delimiter for output CSV/TSV text file(s). Possible values: *comma,
|
|
895 tab, or semicolon* Default value: *comma*.
|
|
896
|
|
897 --output *SD | FP | text | all*
|
|
898 Type of output files to generate. Possible values: *SD, FP, text, or
|
|
899 all*. Default value: *text*.
|
|
900
|
|
901 -o, --overwrite
|
|
902 Overwrite existing files.
|
|
903
|
|
904 -q, --quote *Yes | No*
|
|
905 Put quote around column values in output CSV/TSV text file(s).
|
|
906 Possible values: *Yes or No*. Default value: *Yes*.
|
|
907
|
|
908 -r, --root *RootName*
|
|
909 New file name is generated using the root: <Root>.<Ext>. Default for
|
|
910 new file names: <SDFileName><MACCSKeysFP>.<Ext>. The file type
|
|
911 determines <Ext> value. The sdf, fpf, csv, and tsv <Ext> values are
|
|
912 used for SD, FP, comma/semicolon, and tab delimited text files,
|
|
913 respectively.This option is ignored for multiple input files.
|
|
914
|
|
915 -s, --size *number*
|
|
916 Size of MACCS keys [ Ref 45-47 ] set to use during fingerprints
|
|
917 generation. Possible values: *166 or 322*. Default value: *166*.
|
|
918
|
|
919 -v, --VectorStringFormat *ValuesString | IDsAndValuesString |
|
|
920 IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString*
|
|
921 Format of fingerprints vector string data in output SD, FP or
|
|
922 CSV/TSV text file(s) specified by --output used during
|
|
923 *MACCSKeyCount* value of -m, --mode option. Possible values:
|
|
924 *ValuesString, IDsAndValuesString | IDsAndValuesPairsString |
|
|
925 ValuesAndIDsString | ValuesAndIDsPairsString*. Defaultvalue:
|
|
926 *ValuesString*.
|
|
927
|
|
928 Examples:
|
|
929
|
|
930 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
|
|
931 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
932 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
|
|
933 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
|
|
934 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
|
|
935 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
|
|
936
|
|
937 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
|
|
938 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
|
|
939 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
|
|
940 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
|
941 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
|
|
942 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
|
|
943
|
|
944 -w, --WorkingDir *DirName*
|
|
945 Location of working directory. Default: current directory.
|
|
946
|
|
947 EXAMPLES
|
|
948 To generate MACCS keys fingerprints of size 166 in binary bit-vector
|
|
949 string format and create a SampleMACCS166FPBin.csv file containing
|
|
950 sequential compound IDs along with fingerprints bit-vector strings data,
|
|
951 type:
|
|
952
|
|
953 % MACCSKeysFingerprints.pl -r SampleMACCS166FPBin -o Sample.sdf
|
|
954
|
|
955 To generate MACCS keys fingerprints of size 166 in binary bit-vector
|
|
956 string format and create SampleMACCS166FPBin.sdf,
|
|
957 SampleMACCS166FPBin.csv and SampleMACCS166FPBin.csv files containing
|
|
958 sequential compound IDs in CSV file along with fingerprints bit-vector
|
|
959 strings data, type:
|
|
960
|
|
961 % MACCSKeysFingerprints.pl --output all -r SampleMACCS166FPBin
|
|
962 -o Sample.sdf
|
|
963
|
|
964 To generate MACCS keys fingerprints of size 322 in binary bit-vector
|
|
965 string format and create a SampleMACCS322FPBin.csv file containing
|
|
966 sequential compound IDs along with fingerprints bit-vector strings data,
|
|
967 type:
|
|
968
|
|
969 % MACCSKeysFingerprints.pl -size 322 -r SampleMACCS322FPBin -o Sample.sdf
|
|
970
|
|
971 To generate MACCS keys fingerprints of size 166 corresponding to count
|
|
972 of keys in ValuesString format and create a SampleMACCS166FPCount.csv
|
|
973 file containing sequential compound IDs along with fingerprints vector
|
|
974 strings data, type:
|
|
975
|
|
976 % MACCSKeysFingerprints.pl -m MACCSKeyCount -r SampleMACCS166FPCount
|
|
977 -o Sample.sdf
|
|
978
|
|
979 To generate MACCS keys fingerprints of size 322 corresponding to count
|
|
980 of keys in ValuesString format and create a SampleMACCS322FPCount.csv
|
|
981 file containing sequential compound IDs along with fingerprints vector
|
|
982 strings data, type:
|
|
983
|
|
984 % MACCSKeysFingerprints.pl -m MACCSKeyCount -size 322
|
|
985 -r SampleMACCS322FPCount -o Sample.sdf
|
|
986
|
|
987 To generate MACCS keys fingerprints of size 166 in hexadecimal
|
|
988 bit-vector string format with ascending bits order and create a
|
|
989 SampleMACCS166FPHex.csv file containing compound IDs from MolName along
|
|
990 with fingerprints bit-vector strings data, type:
|
|
991
|
|
992 % MACCSKeysFingerprints.pl -m MACCSKeyBits --size 166 --BitStringFormat
|
|
993 HexadecimalString --BitsOrder Ascending --DataFieldsMode CompoundID
|
|
994 --CompoundIDMode MolName -r SampleMACCS166FPBin -o Sample.sdf
|
|
995
|
|
996 To generate MACCS keys fingerprints of size 166 corresponding to count
|
|
997 of keys in IDsAndValuesString format and create a
|
|
998 SampleMACCS166FPCount.csv file containing compound IDs from MolName line
|
|
999 along with fingerprints vector strings data, type:
|
|
1000
|
|
1001 % MACCSKeysFingerprints.pl -m MACCSKeyCount --size 166
|
|
1002 --VectorStringFormat IDsAndValuesString --DataFieldsMode CompoundID
|
|
1003 --CompoundIDMode MolName -r SampleMACCS166FPCount -o Sample.sdf
|
|
1004
|
|
1005 To generate MACCS keys fingerprints of size 166 corresponding to count
|
|
1006 of keys in IDsAndValuesString format and create a
|
|
1007 SampleMACCS166FPCount.csv file containing compound IDs using specified
|
|
1008 data field along with fingerprints vector strings data, type:
|
|
1009
|
|
1010 % MACCSKeysFingerprints.pl -m MACCSKeyCount --size 166
|
|
1011 --VectorStringFormat IDsAndValuesString --DataFieldsMode CompoundID
|
|
1012 --CompoundIDMode DataField --CompoundID Mol_ID -r
|
|
1013 SampleMACCS166FPCount -o Sample.sdf
|
|
1014
|
|
1015 To generate MACCS keys fingerprints of size 322 corresponding to count
|
|
1016 of keys in ValuesString format and create a SampleMACCS322FPCount.tsv
|
|
1017 file containing compound IDs derived from combination of molecule name
|
|
1018 line and an explicit compound prefix along with fingerprints vector
|
|
1019 strings data in a column labels MACCSKeyCountFP, type:
|
|
1020
|
|
1021 % MACCSKeysFingerprints.pl -m MACCSKeyCount -size 322 --DataFieldsMode
|
|
1022 CompoundID --CompoundIDMode MolnameOrLabelPrefix --CompoundID Cmpd
|
|
1023 --CompoundIDLabel MolID --FingerprintsLabel MACCSKeyCountFP --OutDelim
|
|
1024 Tab -r SampleMACCS322FPCount -o Sample.sdf
|
|
1025
|
|
1026 To generate MACCS keys fingerprints of size 166 corresponding to count
|
|
1027 of keys in ValuesString format and create a SampleMACCS166FPCount.csv
|
|
1028 file containing specific data fields columns along with fingerprints
|
|
1029 vector strings data, type:
|
|
1030
|
|
1031 % MACCSKeysFingerprints.pl -m MACCSKeyCount --size 166
|
|
1032 --VectorStringFormat ValuesString --DataFieldsMode Specify --DataFields
|
|
1033 Mol_ID -r SampleMACCS166FPCount -o Sample.sdf
|
|
1034
|
|
1035 To generate MACCS keys fingerprints of size 322 corresponding to count
|
|
1036 of keys in ValuesString format and create a SampleMACCS322FPCount.csv
|
|
1037 file containing common data fields columns along with fingerprints
|
|
1038 vector strings data, type:
|
|
1039
|
|
1040 % MACCSKeysFingerprints.pl -m MACCSKeyCount --size 322
|
|
1041 --VectorStringFormat ValuesString --DataFieldsMode Common -r
|
|
1042 SampleMACCS322FPCount -o Sample.sdf
|
|
1043
|
|
1044 To generate MACCS keys fingerprints of size 166 corresponding to count
|
|
1045 of keys in ValuesString format and create SampleMACCS166FPCount.sdf,
|
|
1046 SampleMACCS166FPCount.fpf and SampleMACCS166FPCount.csv files containing
|
|
1047 all data fields columns in CSV file along with fingerprints vector
|
|
1048 strings data, type:
|
|
1049
|
|
1050 % MACCSKeysFingerprints.pl -m MACCSKeyCount --size 166 --output all
|
|
1051 --VectorStringFormat ValuesString --DataFieldsMode All -r
|
|
1052 SampleMACCS166FPCount -o Sample.sdf
|
|
1053
|
|
1054 AUTHOR
|
|
1055 Manish Sud <msud@san.rr.com>
|
|
1056
|
|
1057 SEE ALSO
|
|
1058 InfoFingerprintsFiles.pl, SimilarityMatricesFingerprints.pl,
|
|
1059 AtomNeighborhoodsFingerprints.pl, ExtendedConnectivityFingerprints.pl,
|
|
1060 PathLengthFingerprints.pl, TopologicalAtomPairsFingerprints.pl,
|
|
1061 TopologicalAtomTorsionsFingerprints.pl,
|
|
1062 TopologicalPharmacophoreAtomPairsFingerprints.pl,
|
|
1063 TopologicalPharmacophoreAtomTripletsFingerprints.pl
|
|
1064
|
|
1065 COPYRIGHT
|
|
1066 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
1067
|
|
1068 This file is part of MayaChemTools.
|
|
1069
|
|
1070 MayaChemTools is free software; you can redistribute it and/or modify it
|
|
1071 under the terms of the GNU Lesser General Public License as published by
|
|
1072 the Free Software Foundation; either version 3 of the License, or (at
|
|
1073 your option) any later version.
|
|
1074
|