0
|
1 NAME
|
|
2 PathLengthFingerprints.pl - Generate atom path length based fingerprints
|
|
3 for SD files
|
|
4
|
|
5 SYNOPSIS
|
|
6 PathLengthFingerprints.pl SDFile(s)...
|
|
7
|
|
8 PathLengthFingerprints.pl [--AromaticityModel *AromaticityModelType*]
|
|
9 [-a, --AtomIdentifierType *AtomicInvariantsAtomTypes*]
|
|
10 [--AtomicInvariantsToUse *"AtomicInvariant1,AtomicInvariant2..."*]
|
|
11 [--FunctionalClassesToUse *"FunctionalClass1,FunctionalClass2..."*]
|
|
12 [--BitsOrder *Ascending | Descending*] [-b, --BitStringFormat
|
|
13 *BinaryString | HexadecimalString*] [--CompoundID *DataFieldName or
|
|
14 LabelPrefixString*] [--CompoundIDLabel *text*] [--CompoundIDMode
|
|
15 *DataField | MolName | LabelPrefix | MolNameOrLabelPrefix*]
|
|
16 [--DataFields *"FieldLabel1,FieldLabel2,... "*] [-d, --DataFieldsMode
|
|
17 *All | Common | Specify | CompoundID*] [--DetectAromaticity *Yes | No*]
|
|
18 [-f, --Filter *Yes | No*] [--FingerprintsLabel *text*] [--fold *Yes |
|
|
19 No*] [--FoldedSize *number*] [-h, --help] [-i, --IgnoreHydrogens *Yes |
|
|
20 No*] [-k, --KeepLargestComponent *Yes | No*] [-m, --mode *PathLengthBits
|
|
21 | PathLengthCount*] [--MinPathLength *number*] [--MaxPathLength
|
|
22 *number*] [-n, --NumOfBitsToSetPerPath *number*] [--OutDelim *comma |
|
|
23 tab | semicolon*] [--output *SD | FP | text | all*] [-q, --quote *Yes |
|
|
24 No*] [-r, --root *RootName*] [-p, --PathMode *AtomPathsWithoutRings |
|
|
25 AtomPathsWithRings | AllAtomPathsWithoutRings | AllAtomPathsWithRings*]
|
|
26 [-s, --size *number*] [-u, --UseBondSymbols *Yes | No*]
|
|
27 [--UsePerlCoreRandom *Yes | No*] [--UseUniquePaths *Yes | No*] [-q,
|
|
28 --quote *Yes | No*] [-r, --root *RootName*] [-v, --VectorStringFormat
|
|
29 *IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString |
|
|
30 ValuesAndIDsPairsString*] [-w, --WorkingDir dirname] SDFile(s)...
|
|
31
|
|
32 DESCRIPTION
|
|
33 Generate atom path length fingerprints for *SDFile(s)* and create
|
|
34 appropriate SD, FP or CSV/TSV text file(s) containing fingerprints
|
|
35 bit-vector or vector strings corresponding to molecular fingerprints.
|
|
36
|
|
37 Multiple SDFile names are separated by spaces. The valid file extensions
|
|
38 are *.sdf* and *.sd*. All other file names are ignored. All the SD files
|
|
39 in a current directory can be specified either by **.sdf* or the current
|
|
40 directory name.
|
|
41
|
|
42 The current release of MayaChemTools supports generation of path length
|
|
43 fingerprints corresponding to following -a, --AtomIdentifierTypes:
|
|
44
|
|
45 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
|
|
46 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
|
|
47 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
|
|
48
|
|
49 Based on the values specified for -p, --PathMode, --MinPathLength and
|
|
50 --MaxPathLength, all appropriate atom paths are generated for each atom
|
|
51 in the molecule and collected in a list and the list is filtered to
|
|
52 remove any structurally duplicate paths as indicated by the value of
|
|
53 --UseUniquePaths option.
|
|
54
|
|
55 For each atom path in the filtered atom paths list, an atom path string
|
|
56 is created using value of -a, --AtomIdentifierType and specified values
|
|
57 to use for a particular atom identifier type. Value of -u,
|
|
58 --UseBondSymbols controls whether bond order symbols are used during
|
|
59 generation of atom path string. For each atom path, only
|
|
60 lexicographically smaller atom path strings are kept.
|
|
61
|
|
62 For *PathLengthBits* value of -m, --mode option, each atom path is
|
|
63 hashed to a 32 bit unsigned integer key using TextUtil::HashCode
|
|
64 function. Using the hash key as a seed for a random number generator, a
|
|
65 random integer value between 0 and --Size is used to set corresponding
|
|
66 bits in the fingerprint bit-vector string. Value of
|
|
67 --NumOfBitsToSetPerPath option controls the number of time a random
|
|
68 number is generated to set corresponding bits.
|
|
69
|
|
70 For * PathLengthCount* value of -m, --mode option, the number of times
|
|
71 an atom path appears is tracked and a fingerprints count-string
|
|
72 corresponding to count of atom paths is generated.
|
|
73
|
|
74 Example of *SD* file containing path length fingerprints string data:
|
|
75
|
|
76 ... ...
|
|
77 ... ...
|
|
78 $$$$
|
|
79 ... ...
|
|
80 ... ...
|
|
81 ... ...
|
|
82 41 44 0 0 0 0 0 0 0 0999 V2000
|
|
83 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
84 ... ...
|
|
85 2 3 1 0 0 0 0
|
|
86 ... ...
|
|
87 M END
|
|
88 > <CmpdID>
|
|
89 Cmpd1
|
|
90
|
|
91 > <PathLengthFingerprints>
|
|
92 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLengt
|
|
93 h1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a49913991a66
|
|
94 03130b0a19e8051c89184414953800cc2151082844a201042800130860308e8204d4028
|
|
95 00831048940e44281c00060449a5000ac80c894114e006321264401600846c050164462
|
|
96 08190410805000304a10205b0100e04c0038ba0fad0209c0ca8b1200012268b61c0026a
|
|
97 aa0660a11014a011d46
|
|
98
|
|
99 $$$$
|
|
100 ... ...
|
|
101 ... ...
|
|
102
|
|
103 Example of *FP* file containing path length fingerprints string data:
|
|
104
|
|
105 #
|
|
106 # Package = MayaChemTools 7.4
|
|
107 # ReleaseDate = Oct 21, 2010
|
|
108 #
|
|
109 # TimeStamp = Mon Mar 7 15:14:01 2011
|
|
110 #
|
|
111 # FingerprintsStringType = FingerprintsBitVector
|
|
112 #
|
|
113 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
|
|
114 # Size = 1024
|
|
115 # BitStringFormat = HexadecimalString
|
|
116 # BitsOrder = Ascending
|
|
117 #
|
|
118 Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510...
|
|
119 Cmpd2 000000249400840040100042011001001980410c000000001010088001120...
|
|
120 ... ...
|
|
121 ... ..
|
|
122
|
|
123 Example of CSV *Text* file containing pathlength fingerprints string
|
|
124 data:
|
|
125
|
|
126 "CompoundID","PathLengthFingerprints"
|
|
127 "Cmpd1","FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes
|
|
128 :MinLength1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a4
|
|
129 9913991a6603130b0a19e8051c89184414953800cc2151082844a20104280013086030
|
|
130 8e8204d402800831048940e44281c00060449a5000ac80c894114e006321264401..."
|
|
131 ... ...
|
|
132 ... ...
|
|
133
|
|
134 The current release of MayaChemTools generates the following types of
|
|
135 path length fingerprints bit-vector and vector strings:
|
|
136
|
|
137 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
|
|
138 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
|
|
139 0100010101011000101001011100110001000010001001101000001001001001001000
|
|
140 0010110100000111001001000001001010100100100000000011000000101001011100
|
|
141 0010000001000101010100000100111100110111011011011000000010110111001101
|
|
142 0101100011000000010001000011000010100011101100001000001000100000000...
|
|
143
|
|
144 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
|
|
145 th1:MaxLength8;1024;HexadecimalString;Ascending;48caa1315d82d91122b029
|
|
146 42861c9409a4208182d12015509767bd0867653604481a8b1288000056090583603078
|
|
147 9cedae54e26596889ab121309800900490515224208421502120a0dd9200509723ae89
|
|
148 00024181b86c0122821d4e4880c38620dab280824b455404009f082003d52c212b4e6d
|
|
149 6ea05280140069c780290c43
|
|
150
|
|
151 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
|
|
152 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
|
|
153 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
|
|
154 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
|
|
155 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
|
|
156 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
|
|
157
|
|
158 FingerprintsVector;PathLengthCount:DREIDINGAtomTypes:MinLength1:MaxLen
|
|
159 gth8;410;NumericalValues;IDsAndValuesPairsString;C_2 2 C_3 9 C_R 22 F_
|
|
160 1 N_3 1 N_R 1 O_2 2 O_3 3 C_2=O_2 2 C_2C_3 1 C_2C_R 1 C_2N_3 1 C_2O_3
|
|
161 1 C_3C_3 7 C_3C_R 1 C_3N_R 1 C_3O_3 2 C_R:C_R 21 C_R:N_R 2 C_RC_R 2 C
|
|
162 _RF_ 1 C_RN_3 1 C_2C_3C_3 1 C_2C_R:C_R 2 C_2N_3C_R 1 C_3C_2=O_2 1 C_3C
|
|
163 _2O_3 1 C_3C_3C_3 5 C_3C_3C_R 2 C_3C_3N_R 1 C_3C_3O_3 4 C_3C_R:C_R ...
|
|
164
|
|
165 FingerprintsVector;PathLengthCount:EStateAtomTypes:MinLength1:MaxLengt
|
|
166 h8;454;NumericalValues;IDsAndValuesPairsString;aaCH 14 aasC 8 aasN 1 d
|
|
167 O 2 dssC 2 sCH3 2 sF 1 sOH 3 ssCH2 4 ssNH 1 sssCH 3 aaCH:aaCH 10 aaCH:
|
|
168 aasC 8 aasC:aasC 3 aasC:aasN 2 aasCaasC 2 aasCdssC 1 aasCsF 1 aasCssNH
|
|
169 1 aasCsssCH 1 aasNssCH2 1 dO=dssC 2 dssCsOH 1 dssCssCH2 1 dssCssNH 1
|
|
170 sCH3sssCH 2 sOHsssCH 2 ssCH2ssCH2 1 ssCH2sssCH 4 aaCH:aaCH:aaCH 6 a...
|
|
171
|
|
172 FingerprintsVector;PathLengthCount:FunctionalClassAtomTypes:MinLength1
|
|
173 :MaxLength8;404;NumericalValues;IDsAndValuesPairsString;Ar 22 Ar.HBA 1
|
|
174 HBA 2 HBA.HBD 3 HBD 1 Hal 1 NI 1 None 10 Ar.HBA:Ar 2 Ar.HBANone 1 Ar:
|
|
175 Ar 21 ArAr 2 ArHBD 1 ArHal 1 ArNone 2 HBA.HBDNI 1 HBA.HBDNone 2 HBA=NI
|
|
176 1 HBA=None 1 HBDNone 1 NINone 1 NoneNone 7 Ar.HBA:Ar:Ar 2 Ar.HBA:ArAr
|
|
177 1 Ar.HBA:ArNone 1 Ar.HBANoneNone 1 Ar:Ar.HBA:Ar 1 Ar:Ar.HBANone 2 ...
|
|
178
|
|
179 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt
|
|
180 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1
|
|
181 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N
|
|
182 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1
|
|
183 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR
|
|
184 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ...
|
|
185
|
|
186 FingerprintsVector;PathLengthCount:SLogPAtomTypes:MinLength1:MaxLength
|
|
187 8;518;NumericalValues;IDsAndValuesPairsString;C1 5 C10 1 C11 1 C14 1 C
|
|
188 18 14 C20 4 C21 2 C22 1 C5 2 CS 2 F 1 N11 1 N4 1 O10 1 O2 3 O9 1 C10C1
|
|
189 1 C10N11 1 C11C1 2 C11C21 1 C14:C18 2 C14F 1 C18:C18 10 C18:C20 4 C18
|
|
190 :C22 2 C1C5 1 C1CS 4 C20:C20 1 C20:C21 1 C20:N11 1 C20C20 2 C21:C21 1
|
|
191 C21:N11 1 C21C5 1 C22N4 1 C5=O10 1 C5=O9 1 C5N4 1 C5O2 1 CSO2 2 C10...
|
|
192
|
|
193 FingerprintsVector;PathLengthCount:SYBYLAtomTypes:MinLength1:MaxLength
|
|
194 8;412;NumericalValues;IDsAndValuesPairsString;C.2 2 C.3 9 C.ar 22 F 1
|
|
195 N.am 1 N.ar 1 O.2 1 O.3 2 O.co2 2 C.2=O.2 1 C.2=O.co2 1 C.2C.3 1 C.2C.
|
|
196 ar 1 C.2N.am 1 C.2O.co2 1 C.3C.3 7 C.3C.ar 1 C.3N.ar 1 C.3O.3 2 C.ar:C
|
|
197 .ar 21 C.ar:N.ar 2 C.arC.ar 2 C.arF 1 C.arN.am 1 C.2C.3C.3 1 C.2C.ar:C
|
|
198 .ar 2 C.2N.amC.ar 1 C.3C.2=O.co2 1 C.3C.2O.co2 1 C.3C.3C.3 5 C.3C.3...
|
|
199
|
|
200 FingerprintsVector;PathLengthCount:TPSAAtomTypes:MinLength1:MaxLength8
|
|
201 ;331;NumericalValues;IDsAndValuesPairsString;N21 1 N7 1 None 34 O3 2 O
|
|
202 4 3 N21:None 2 N21None 1 N7None 2 None:None 21 None=O3 2 NoneNone 13 N
|
|
203 oneO4 3 N21:None:None 2 N21:NoneNone 2 N21NoneNone 1 N7None:None 2 N7N
|
|
204 one=O3 1 N7NoneNone 1 None:N21:None 1 None:N21None 2 None:None:None 20
|
|
205 None:NoneNone 12 NoneN7None 1 NoneNone=O3 2 NoneNoneNone 8 NoneNon...
|
|
206
|
|
207 FingerprintsVector;PathLengthCount:UFFAtomTypes:MinLength1:MaxLength8;
|
|
208 410;NumericalValues;IDsAndValuesPairsString;C_2 2 C_3 9 C_R 22 F_ 1 N_
|
|
209 3 1 N_R 1 O_2 2 O_3 3 C_2=O_2 2 C_2C_3 1 C_2C_R 1 C_2N_3 1 C_2O_3 1 C_
|
|
210 3C_3 7 C_3C_R 1 C_3N_R 1 C_3O_3 2 C_R:C_R 21 C_R:N_R 2 C_RC_R 2 C_RF_
|
|
211 1 C_RN_3 1 C_2C_3C_3 1 C_2C_R:C_R 2 C_2N_3C_R 1 C_3C_2=O_2 1 C_3C_2O_3
|
|
212 1 C_3C_3C_3 5 C_3C_3C_R 2 C_3C_3N_R 1 C_3C_3O_3 4 C_3C_R:C_R 1 C_3...
|
|
213
|
|
214 OPTIONS
|
|
215 --AromaticityModel *MDLAromaticityModel | TriposAromaticityModel |
|
|
216 MMFFAromaticityModel | ChemAxonBasicAromaticityModel |
|
|
217 ChemAxonGeneralAromaticityModel | DaylightAromaticityModel |
|
|
218 MayaChemToolsAromaticityModel*
|
|
219 Specify aromaticity model to use during detection of aromaticity.
|
|
220 Possible values in the current release are: *MDLAromaticityModel,
|
|
221 TriposAromaticityModel, MMFFAromaticityModel,
|
|
222 ChemAxonBasicAromaticityModel, ChemAxonGeneralAromaticityModel,
|
|
223 DaylightAromaticityModel or MayaChemToolsAromaticityModel*. Default
|
|
224 value: *MayaChemToolsAromaticityModel*.
|
|
225
|
|
226 The supported aromaticity model names along with model specific
|
|
227 control parameters are defined in AromaticityModelsData.csv, which
|
|
228 is distributed with the current release and is available under
|
|
229 lib/data directory. Molecule.pm module retrieves data from this file
|
|
230 during class instantiation and makes it available to method
|
|
231 DetectAromaticity for detecting aromaticity corresponding to a
|
|
232 specific model.
|
|
233
|
|
234 This option is ignored during *No* value of --DetectAromaticity
|
|
235 option.
|
|
236
|
|
237 -a, --AtomIdentifierType *AtomicInvariantsAtomTypes | DREIDINGAtomTypes
|
|
238 | EStateAtomTypes | FunctionalClassAtomTypes | MMFF94AtomTypes |
|
|
239 SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes*
|
|
240 Specify atom identifier type to use for assignment of atom types to
|
|
241 hydrogen and/or non-hydrogen atoms during calculation of atom types
|
|
242 fingerprints. Possible values in the current release are:
|
|
243 *AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
|
|
244 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
|
|
245 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes*. Default value:
|
|
246 *AtomicInvariantsAtomTypes*.
|
|
247
|
|
248 -a, --AtomIdentifierType *AtomicInvariantsAtomTypes | DREIDINGAtomTypes
|
|
249 | EStateAtomTypes | FunctionalClassAtomTypes | MMFF94AtomTypes |
|
|
250 SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes*
|
|
251 Specify atom identifier type to use during generation of atom path
|
|
252 strings corresponding to path length fingerprints. Possible values
|
|
253 in the current release are: *AtomicInvariantsAtomTypes,
|
|
254 DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
|
|
255 MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes,
|
|
256 UFFAtomTypes*. Default value: *AtomicInvariantsAtomTypes*.
|
|
257
|
|
258 --AtomicInvariantsToUse *"AtomicInvariant1,AtomicInvariant2..."*
|
|
259 This value is used during *AtomicInvariantsAtomTypes* value of a,
|
|
260 --AtomIdentifierType option. It's a list of comma separated valid
|
|
261 atomic invariant atom types.
|
|
262
|
|
263 Possible values for atomic invariants are: *AS, X, BO, LBO, SB, DB,
|
|
264 TB, H, Ar, RA, FC, MN, SM*. Default value: *AS*.
|
|
265
|
|
266 The atomic invariants abbreviations correspond to:
|
|
267
|
|
268 AS = Atom symbol corresponding to element symbol
|
|
269
|
|
270 X<n> = Number of non-hydrogen atom neighbors or heavy atoms
|
|
271 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms
|
|
272 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms
|
|
273 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms
|
|
274 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms
|
|
275 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms
|
|
276 H<n> = Number of implicit and explicit hydrogens for atom
|
|
277 Ar = Aromatic annotation indicating whether atom is aromatic
|
|
278 RA = Ring atom annotation indicating whether atom is a ring
|
|
279 FC<+n/-n> = Formal charge assigned to atom
|
|
280 MN<n> = Mass number indicating isotope other than most abundant isotope
|
|
281 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or
|
|
282 3 (triplet)
|
|
283
|
|
284 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class
|
|
285 corresponds to:
|
|
286
|
|
287 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
|
|
288
|
|
289 Except for AS which is a required atomic invariant in atom types,
|
|
290 all other atomic invariants are optional. Atom type specification
|
|
291 doesn't include atomic invariants with zero or undefined values.
|
|
292
|
|
293 In addition to usage of abbreviations for specifying atomic
|
|
294 invariants, the following descriptive words are also allowed:
|
|
295
|
|
296 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
|
|
297 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
|
|
298 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
|
|
299 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
|
|
300 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
|
|
301 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
|
|
302 H : NumOfImplicitAndExplicitHydrogens
|
|
303 Ar : Aromatic
|
|
304 RA : RingAtom
|
|
305 FC : FormalCharge
|
|
306 MN : MassNumber
|
|
307 SM : SpinMultiplicity
|
|
308
|
|
309 Examples:
|
|
310
|
|
311 Benzene: Using value of *AS* for --AtomicInvariantsToUse, *Yes* for
|
|
312 UseBondSymbols, and * AllAtomPathsWithRings* for -p, --PathMode,
|
|
313 atom path strings generated are:
|
|
314
|
|
315 C C:C C:C:C C:C:C:C C:C:C:C:C C:C:C:C:C:C C:C:C:C:C:C:C
|
|
316
|
|
317 And using *AS,X,BO* for --AtomicInvariantsToUse generates following
|
|
318 atom path strings:
|
|
319
|
|
320 C.X2.BO3 C.X2.BO3:C.X2.BO3 C.X2.BO3:C.X2.BO3:C.X2.BO3
|
|
321 C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3
|
|
322 C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3
|
|
323 C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3
|
|
324 C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3
|
|
325
|
|
326 Urea: Using value of *AS* for --AtomicInvariantsToUse, *Yes* for
|
|
327 UseBondSymbols, and * AllAtomPathsWithRings* for -p, --PathMode,
|
|
328 atom path strings are:
|
|
329
|
|
330 C N O C=O CN NC=O NCN
|
|
331
|
|
332 And using *AS,X,BO* for --AtomicInvariantsToUse generates following
|
|
333 atom path strings:
|
|
334
|
|
335 C.X3.BO4 N.X1.BO1 O.X1.BO2 C.X3.BO4=O.X1.BO2
|
|
336 C.X3.BO4N.X1.BO1 N.X1.BO1C.X3.BO4=O.X1.BO2
|
|
337 N.X1.BO1C.X3.BO4N.X1.BO1
|
|
338
|
|
339 --FunctionalClassesToUse *"FunctionalClass1,FunctionalClass2..."*
|
|
340 This value is used during *FunctionalClassAtomTypes* value of a,
|
|
341 --AtomIdentifierType option. It's a list of comma separated valid
|
|
342 functional classes.
|
|
343
|
|
344 Possible values for atom functional classes are: *Ar, CA, H, HBA,
|
|
345 HBD, Hal, NI, PI, RA*. Default value [ Ref 24 ]:
|
|
346 *HBD,HBA,PI,NI,Ar,Hal*.
|
|
347
|
|
348 The functional class abbreviations correspond to:
|
|
349
|
|
350 HBD: HydrogenBondDonor
|
|
351 HBA: HydrogenBondAcceptor
|
|
352 PI : PositivelyIonizable
|
|
353 NI : NegativelyIonizable
|
|
354 Ar : Aromatic
|
|
355 Hal : Halogen
|
|
356 H : Hydrophobic
|
|
357 RA : RingAtom
|
|
358 CA : ChainAtom
|
|
359
|
|
360 Functional class atom type specification for an atom corresponds to:
|
|
361
|
|
362 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
|
|
363
|
|
364 *AtomTypes::FunctionalClassAtomTypes* module is used to assign
|
|
365 functional class atom types. It uses following definitions [ Ref
|
|
366 60-61, Ref 65-66 ]:
|
|
367
|
|
368 HydrogenBondDonor: NH, NH2, OH
|
|
369 HydrogenBondAcceptor: N[!H], O
|
|
370 PositivelyIonizable: +, NH2
|
|
371 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
|
|
372
|
|
373 --BitsOrder *Ascending | Descending*
|
|
374 Bits order to use during generation of fingerprints bit-vector
|
|
375 string for *PathLengthBits* value of -m, --mode option. Possible
|
|
376 values: *Ascending, Descending*. Default: *Ascending*.
|
|
377
|
|
378 *Ascending* bit order which corresponds to first bit in each byte as
|
|
379 the lowest bit as opposed to the highest bit.
|
|
380
|
|
381 Internally, bits are stored in *Ascending* order using Perl vec
|
|
382 function. Regardless of machine order, big-endian or little-endian,
|
|
383 vec function always considers first string byte as the lowest byte
|
|
384 and first bit within each byte as the lowest bit.
|
|
385
|
|
386 -b, --BitStringFormat *BinaryString | HexadecimalString*
|
|
387 Format of fingerprints bit-vector string data in output SD, FP or
|
|
388 CSV/TSV text file(s) specified by --output used during
|
|
389 *PathLengthBits* value of -m, --mode option. Possible values:
|
|
390 *BinaryString, HexadecimalString*. Default value:
|
|
391 *HexadecimalString*.
|
|
392
|
|
393 *BinaryString* corresponds to an ASCII string containing 1s and 0s.
|
|
394 *HexadecimalString* contains bit values in ASCII hexadecimal format.
|
|
395
|
|
396 Examples:
|
|
397
|
|
398 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
|
|
399 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
|
|
400 0100010101011000101001011100110001000010001001101000001001001001001000
|
|
401 0010110100000111001001000001001010100100100000000011000000101001011100
|
|
402 0010000001000101010100000100111100110111011011011000000010110111001101
|
|
403 0101100011000000010001000011000010100011101100001000001000100000000...
|
|
404
|
|
405 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
|
|
406 th1:MaxLength8;1024;HexadecimalString;Ascending;48caa1315d82d91122b029
|
|
407 42861c9409a4208182d12015509767bd0867653604481a8b1288000056090583603078
|
|
408 9cedae54e26596889ab121309800900490515224208421502120a0dd9200509723ae89
|
|
409 00024181b86c0122821d4e4880c38620dab280824b455404009f082003d52c212b4e6d
|
|
410 6ea05280140069c780290c43
|
|
411
|
|
412 --CompoundID *DataFieldName or LabelPrefixString*
|
|
413 This value is --CompoundIDMode specific and indicates how compound
|
|
414 ID is generated.
|
|
415
|
|
416 For *DataField* value of --CompoundIDMode option, it corresponds to
|
|
417 datafield label name whose value is used as compound ID; otherwise,
|
|
418 it's a prefix string used for generating compound IDs like
|
|
419 LabelPrefixString<Number>. Default value, *Cmpd*, generates compound
|
|
420 IDs which look like Cmpd<Number>.
|
|
421
|
|
422 Examples for *DataField* value of --CompoundIDMode:
|
|
423
|
|
424 MolID
|
|
425 ExtReg
|
|
426
|
|
427 Examples for *LabelPrefix* or *MolNameOrLabelPrefix* value of
|
|
428 --CompoundIDMode:
|
|
429
|
|
430 Compound
|
|
431
|
|
432 The value specified above generates compound IDs which correspond to
|
|
433 Compound<Number> instead of default value of Cmpd<Number>.
|
|
434
|
|
435 --CompoundIDLabel *text*
|
|
436 Specify compound ID column label for FP or CSV/TSV text file(s) used
|
|
437 during *CompoundID* value of --DataFieldsMode option. Default:
|
|
438 *CompoundID*.
|
|
439
|
|
440 --CompoundIDMode *DataField | MolName | LabelPrefix |
|
|
441 MolNameOrLabelPrefix*
|
|
442 Specify how to generate compound IDs and write to FP or CSV/TSV text
|
|
443 file(s) along with generated fingerprints for *FP | text | all*
|
|
444 values of --output option: use a *SDFile(s)* datafield value; use
|
|
445 molname line from *SDFile(s)*; generate a sequential ID with
|
|
446 specific prefix; use combination of both MolName and LabelPrefix
|
|
447 with usage of LabelPrefix values for empty molname lines.
|
|
448
|
|
449 Possible values: *DataField | MolName | LabelPrefix |
|
|
450 MolNameOrLabelPrefix*. Default: *LabelPrefix*.
|
|
451
|
|
452 For *MolNameAndLabelPrefix* value of --CompoundIDMode, molname line
|
|
453 in *SDFile(s)* takes precedence over sequential compound IDs
|
|
454 generated using *LabelPrefix* and only empty molname values are
|
|
455 replaced with sequential compound IDs.
|
|
456
|
|
457 This is only used for *CompoundID* value of --DataFieldsMode option.
|
|
458
|
|
459 --DataFields *"FieldLabel1,FieldLabel2,... "*
|
|
460 Comma delimited list of *SDFiles(s)* data fields to extract and
|
|
461 write to CSV/TSV text file(s) along with generated fingerprints for
|
|
462 *text | all* values of --output option.
|
|
463
|
|
464 This is only used for *Specify* value of --DataFieldsMode option.
|
|
465
|
|
466 Examples:
|
|
467
|
|
468 Extreg
|
|
469 MolID,CompoundName
|
|
470
|
|
471 -d, --DataFieldsMode *All | Common | Specify | CompoundID*
|
|
472 Specify how data fields in *SDFile(s)* are transferred to output
|
|
473 CSV/TSV text file(s) along with generated fingerprints for *text |
|
|
474 all* values of --output option: transfer all SD data field; transfer
|
|
475 SD data files common to all compounds; extract specified data
|
|
476 fields; generate a compound ID using molname line, a compound
|
|
477 prefix, or a combination of both. Possible values: *All | Common |
|
|
478 specify | CompoundID*. Default value: *CompoundID*.
|
|
479
|
|
480 --DetectAromaticity *Yes | No*
|
|
481 Detect aromaticity before generating fingerprints. Possible values:
|
|
482 *Yes or No*. Default value: *Yes*.
|
|
483
|
|
484 *No* --DetectAromaticity forces usage of atom and bond aromaticity
|
|
485 values from *SDFile(s)* and skips the step which detects and assigns
|
|
486 aromaticity.
|
|
487
|
|
488 *No* --DetectAromaticity value is only allowed uring
|
|
489 *AtomicInvariantsAtomTypes* value of -a, --AtomIdentifierType
|
|
490 options; for all possible values -a, --AtomIdentifierType values, it
|
|
491 must be *Yes*.
|
|
492
|
|
493 -f, --Filter *Yes | No*
|
|
494 Specify whether to check and filter compound data in SDFile(s).
|
|
495 Possible values: *Yes or No*. Default value: *Yes*.
|
|
496
|
|
497 By default, compound data is checked before calculating fingerprints
|
|
498 and compounds containing atom data corresponding to non-element
|
|
499 symbols or no atom data are ignored.
|
|
500
|
|
501 --FingerprintsLabel *text*
|
|
502 SD data label or text file column label to use for fingerprints
|
|
503 string in output SD or CSV/TSV text file(s) specified by --output.
|
|
504 Default value: *PathLenghFingerprints*.
|
|
505
|
|
506 --fold *Yes | No*
|
|
507 Fold fingerprints to increase bit density during *PathLengthBits*
|
|
508 value of -m, --mode option. Possible values: *Yes or No*. Default
|
|
509 value: *No*.
|
|
510
|
|
511 --FoldedSize *number*
|
|
512 Size of folded fingerprint during *PathLengthBits* value of -m,
|
|
513 --mode option. Default value: *256*. Valid values correspond to any
|
|
514 positive integer which is less than -s, --size and meets the
|
|
515 criteria for its value.
|
|
516
|
|
517 Examples:
|
|
518
|
|
519 128
|
|
520 512
|
|
521
|
|
522 -h, --help
|
|
523 Print this help message
|
|
524
|
|
525 -i, --IgnoreHydrogens *Yes | No*
|
|
526 Ignore hydrogens during fingerprints generation. Possible values:
|
|
527 *Yes or No*. Default value: *Yes*.
|
|
528
|
|
529 For *yes* value of -i, --IgnoreHydrogens, any explicit hydrogens are
|
|
530 also used for generation of atoms path lengths and fingerprints;
|
|
531 implicit hydrogens are still ignored.
|
|
532
|
|
533 -k, --KeepLargestComponent *Yes | No*
|
|
534 Generate fingerprints for only the largest component in molecule.
|
|
535 Possible values: *Yes or No*. Default value: *Yes*.
|
|
536
|
|
537 For molecules containing multiple connected components, fingerprints
|
|
538 can be generated in two different ways: use all connected components
|
|
539 or just the largest connected component. By default, all atoms
|
|
540 except for the largest connected component are deleted before
|
|
541 generation of fingerprints.
|
|
542
|
|
543 -m, --mode *PathLengthBits | PathLengthCount*
|
|
544 Specify type of path length fingerprints to generate for molecules
|
|
545 in *SDFile(s)*. Possible values: *PathLengthBits, PathLengthCount*.
|
|
546 Default value: *PathLengthBits*.
|
|
547
|
|
548 For *PathLengthBits* value of -m, --mode option, a fingerprint
|
|
549 bit-vector string containing zeros and ones is generated and for
|
|
550 *PathLengthCount* value, a fingerprint vector string corresponding
|
|
551 to number of atom paths is generated.
|
|
552
|
|
553 --MinPathLength *number*
|
|
554 Minimum atom path length to include in fingerprints. Default value:
|
|
555 *1*. Valid values: positive integers and less than --MaxPathLength.
|
|
556 Path length of 1 correspond to a path containing only one atom.
|
|
557
|
|
558 --MaxPathLength *number*
|
|
559 Maximum atom path length to include in fingerprints. Default value:
|
|
560 *8*. Valid values: positive integers and greater than
|
|
561 --MinPathLength.
|
|
562
|
|
563 -n, --NumOfBitsToSetPerPath *number*
|
|
564 Number of bits to set per path during generation of fingerprints
|
|
565 bit-vector string for *PathLengthBits* value of -m, --mode option.
|
|
566 Default value: *1*. Valid values: positive integers.
|
|
567
|
|
568 --OutDelim *comma | tab | semicolon*
|
|
569 Delimiter for output CSV/TSV text file(s). Possible values: *comma,
|
|
570 tab, or semicolon* Default value: *comma*.
|
|
571
|
|
572 --output *SD | FP | text | all*
|
|
573 Type of output files to generate. Possible values: *SD, FP, text, or
|
|
574 all*. Default value: *text*.
|
|
575
|
|
576 -o, --overwrite
|
|
577 Overwrite existing files.
|
|
578
|
|
579 -p, --PathMode *AtomPathsWithoutRings | AtomPathsWithRings |
|
|
580 AllAtomPathsWithoutRings | AllAtomPathsWithRings*
|
|
581 Specify type of atom paths to use for generating pathlength
|
|
582 fingerprints for molecules in *SDFile(s)*. Possible
|
|
583 values:*AtomPathsWithoutRings, AtomPathsWithRings,
|
|
584 AllAtomPathsWithoutRings, AllAtomPathsWithRings*. Default value:
|
|
585 *AllAtomPathsWithRings*.
|
|
586
|
|
587 For molecules with no rings, first two and last two options are
|
|
588 equivalent and generate same set of atom paths starting from each
|
|
589 atom with length between --MinPathLength and --MaxPathLength.
|
|
590 However, all these four options can result in the same set of final
|
|
591 atom paths for molecules containing fused, bridged or spiro rings.
|
|
592
|
|
593 For molecules containing rings, atom paths starting from each atom
|
|
594 can be traversed in four different ways:
|
|
595
|
|
596 *AtomPathsWithoutRings* - Atom paths containing no rings and without
|
|
597 sharing of bonds in traversed paths.
|
|
598
|
|
599 *AtomPathsWithRings* - Atom paths containing rings and without any
|
|
600 sharing of bonds in traversed paths.
|
|
601
|
|
602 *AllAtomPathsWithoutRings* - All possible atom paths containing no
|
|
603 rings and without any sharing of bonds in traversed paths.
|
|
604
|
|
605 *AllAtomPathsWithRings* - All possible atom paths containing rings
|
|
606 and with sharing of bonds in traversed paths.
|
|
607
|
|
608 Atom path traversal is terminated at the ring atom.
|
|
609
|
|
610 Based on values specified for for -p, --PathMode, --MinPathLength
|
|
611 and --MaxPathLength, all appropriate atom paths are generated for
|
|
612 each atom in the molecule and collected in a list.
|
|
613
|
|
614 For each atom path in the filtered atom paths list, an atom path
|
|
615 string is created using value of -a, --AtomIdentifierType and
|
|
616 specified values to use for a particular atom identifier type. Value
|
|
617 of -u, --UseBondSymbols controls whether bond order symbols are used
|
|
618 during generation of atom path string. Atom symbol corresponds to
|
|
619 element symbol and characters used to represent bond order are: *1 -
|
|
620 None; 2 - '='; 3 - '#'; 1.5 or aromatic - ':'; others: bond order
|
|
621 value*. By default, bond symbols are included in atom path strings.
|
|
622 Exclusion of bond symbols in atom path strings results in
|
|
623 fingerprints which correspond purely to atom paths without
|
|
624 considering bonds.
|
|
625
|
|
626 UseUniquePaths controls the removal of structurally duplicate atom
|
|
627 path strings are removed from the list.
|
|
628
|
|
629 For *PathLengthBits* value of -m, --mode option, each atom path is
|
|
630 hashed to a 32 bit unsigned integer key using TextUtil::HashCode
|
|
631 function. Using the hash key as a seed for a random number
|
|
632 generator, a random integer value between 0 and --Size is used to
|
|
633 set corresponding bits in the fingerprint bit-vector string. Value
|
|
634 of --NumOfBitsToSetPerPaths option controls the number of time a
|
|
635 random number is generated to set corresponding bits.
|
|
636
|
|
637 For * PathLengthCount* value of -m, --mode option, the number of
|
|
638 times an atom path appears is tracked and a fingerprints
|
|
639 count-string corresponding to count of atom paths is generated.
|
|
640
|
|
641 For molecule containing rings, combination of -p, --PathMode and
|
|
642 --UseBondSymbols allows generation of up to 8 different types of
|
|
643 atom path length strings:
|
|
644
|
|
645 AllowSharedBonds AllowRings UseBondSymbols
|
|
646
|
|
647 0 0 1 - AtomPathsNoCyclesWithBondSymbols
|
|
648 0 1 1 - AtomPathsWithCyclesWithBondSymbols
|
|
649
|
|
650 1 0 1 - AllAtomPathsNoCyclesWithBondSymbols
|
|
651 1 1 1 - AllAtomPathsWithCyclesWithBondSymbols
|
|
652 [ DEFAULT ]
|
|
653
|
|
654 0 0 0 - AtomPathsNoCyclesNoBondSymbols
|
|
655 0 1 0 - AtomPathsWithCyclesNoBondSymbols
|
|
656
|
|
657 1 0 0 - AllAtomPathsNoCyclesNoBondSymbols
|
|
658 1 1 0 - AllAtomPathsWithCyclesNoWithBondSymbols
|
|
659
|
|
660 Default atom path length fingerprints generation for molecules
|
|
661 containing rings with *AllAtomPathsWithRings* value for -p,
|
|
662 --PathMode, *Yes* value for --UseBondSymbols, *2* value for
|
|
663 --MinPathLength and *8* value for --MaxPathLength is the most time
|
|
664 consuming. Combinations of other options can substantially speed up
|
|
665 fingerprint generation for molecules containing complex ring
|
|
666 systems.
|
|
667
|
|
668 Additionally, value for option -a, --AtomIdentifierType in
|
|
669 conjunction with corresponding specified values for atom types
|
|
670 changes the nature of atom path length strings and the fingerprints.
|
|
671
|
|
672 -q, --quote *Yes | No*
|
|
673 Put quote around column values in output CSV/TSV text file(s).
|
|
674 Possible values: *Yes or No*. Default value: *Yes*.
|
|
675
|
|
676 -r, --root *RootName*
|
|
677 New file name is generated using the root: <Root>.<Ext>. Default for
|
|
678 new file names: <SDFileName><PathLengthFP>.<Ext>. The file type
|
|
679 determines <Ext> value. The sdf, fpf, csv, and tsv <Ext> values are
|
|
680 used for SD, FP, comma/semicolon, and tab delimited text files,
|
|
681 respectively.This option is ignored for multiple input files.
|
|
682
|
|
683 -s, --size *number*
|
|
684 Size of fingerprints. Default value: *1024*. Valid values correspond
|
|
685 to any positive integer which satisfies the following criteria:
|
|
686 power of 2, >= 32 and <= 2 ** 32.
|
|
687
|
|
688 Examples:
|
|
689
|
|
690 256
|
|
691 512
|
|
692 2048
|
|
693
|
|
694 -u, --UseBondSymbols *Yes | No*
|
|
695 Specify whether to use bond symbols for atom paths during generation
|
|
696 of atom path strings. Possible values: *Yes or No*. Default value:
|
|
697 *Yes*.
|
|
698
|
|
699 *No* value option for -u, --UseBondSymbols allows the generation of
|
|
700 fingerprints corresponding purely to atoms disregarding all bonds.
|
|
701
|
|
702 --UsePerlCoreRandom *Yes | No*
|
|
703 Specify whether to use Perl CORE::rand or MayaChemTools
|
|
704 MathUtil::random function during random number generation for
|
|
705 setting bits in fingerprints bit-vector strings. Possible values:
|
|
706 *Yes or No*. Default value: *Yes*.
|
|
707
|
|
708 *No* value option for --UsePerlCoreRandom allows the generation of
|
|
709 fingerprints bit-vector strings which are same across different
|
|
710 platforms.
|
|
711
|
|
712 The random number generator implemented in MayaChemTools is a
|
|
713 variant of linear congruential generator (LCG) as described by
|
|
714 Miller et al. [ Ref 120 ]. It is also referred to as Lehmer random
|
|
715 number generator or Park-Miller random number generator.
|
|
716
|
|
717 Unlike Perl's core random number generator function rand, the random
|
|
718 number generator implemented in MayaChemTools, MathUtil::random,
|
|
719 generates consistent random values across different platforms for a
|
|
720 specific random seed and leads to generation of portable
|
|
721 fingerprints bit-vector strings.
|
|
722
|
|
723 --UseUniquePaths *Yes | No*
|
|
724 Specify whether to use structurally unique atom paths during
|
|
725 generation of atom path strings. Possible values: *Yes or No*.
|
|
726 Default value: *Yes*.
|
|
727
|
|
728 *No* value option for --UseUniquePaths allows usage of all atom
|
|
729 paths generated by -p, --PathMode option value for generation of
|
|
730 atom path strings leading to duplicate path count during
|
|
731 *PathLengthCount* value of -m, --mode option. It doesn't change
|
|
732 fingerprint string generated during *PathLengthBits* value of -m,
|
|
733 --mode.
|
|
734
|
|
735 For example, during *AllAtomPathsWithRings* value of -p, --PathMode
|
|
736 option, benzene has 12 linear paths of length 2 and 12 cyclic paths
|
|
737 length of 7, but only 6 linear paths of length 2 and 1 cyclic path
|
|
738 of length 7 are structurally unique.
|
|
739
|
|
740 -v, --VectorStringFormat *IDsAndValuesString | IDsAndValuesPairsString |
|
|
741 ValuesAndIDsString | ValuesAndIDsPairsString*
|
|
742 Format of fingerprints vector string data in output SD, FP or
|
|
743 CSV/TSV text file(s) specified by --output used during
|
|
744 *PathLengthCount* value of -m, --mode option. Possible values:
|
|
745 *IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString |
|
|
746 ValuesAndIDsPairsString*. Defaultvalue: *IDsAndValuesString*.
|
|
747
|
|
748 Examples:
|
|
749
|
|
750 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
|
|
751 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
|
|
752 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
|
|
753 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
|
|
754 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
|
|
755 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
|
|
756
|
|
757 FingerprintsVector;PathLengthCount:EStateAtomTypes:MinLength1:MaxLengt
|
|
758 h8;454;NumericalValues;IDsAndValuesPairsString;aaCH 14 aasC 8 aasN 1 d
|
|
759 O 2 dssC 2 sCH3 2 sF 1 sOH 3 ssCH2 4 ssNH 1 sssCH 3 aaCH:aaCH 10 aaCH:
|
|
760 aasC 8 aasC:aasC 3 aasC:aasN 2 aasCaasC 2 aasCdssC 1 aasCsF 1 aasCssNH
|
|
761 1 aasCsssCH 1 aasNssCH2 1 dO=dssC 2 dssCsOH 1 dssCssCH2 1 dssCssNH 1
|
|
762 sCH3sssCH 2 sOHsssCH 2 ssCH2ssCH2 1 ssCH2sssCH 4 aaCH:aaCH:aaCH 6 a...
|
|
763
|
|
764 -w, --WorkingDir *DirName*
|
|
765 Location of working directory. Default: current directory.
|
|
766
|
|
767 EXAMPLES
|
|
768 To generate path length fingerprints corresponding to all unique paths
|
|
769 from length 1 through 8 in hexadecimal bit-vector string format of size
|
|
770 1024 and create a SamplePLFPHex.csv file containing sequential compound
|
|
771 IDs along with fingerprints bit-vector strings data, type:
|
|
772
|
|
773 % PathLengthFingerprints.pl -o -r SamplePLFPHex Sample.sdf
|
|
774
|
|
775 To generate path length fingerprints corresponding to all unique paths
|
|
776 from length 1 through 8 in hexadecimal bit-vector string format of size
|
|
777 1024 and create SamplePLFPHex.sdf, SamplePLFPHex.fpf, and
|
|
778 SamplePLFPHex.csv files containing sequential compound IDs in CSV file
|
|
779 along with fingerprints bit-vector strings data, type:
|
|
780
|
|
781 % PathLengthFingerprints.pl --output all -o -r SamplePLFPHex Sample.sdf
|
|
782
|
|
783 To generate path length fingerprints corresponding to all unique paths
|
|
784 from length 1 through 8 in binary bit-vector string format of size 1024
|
|
785 and create a SamplePLFPBin.csv file containing sequential compound IDs
|
|
786 along with fingerprints bit-vector strings data, type:
|
|
787
|
|
788 % PathLengthFingerprints.pl --BitStringFormat BinaryString --size 2048
|
|
789 -o -r SamplePLFPBin Sample.sdf
|
|
790
|
|
791 To generate path length fingerprints corresponding to count of all
|
|
792 unique paths from length 1 through 8 in IDsAndValuesString format and
|
|
793 create a SamplePLFPCount.csv file containing sequential compound IDs
|
|
794 along with fingerprints vector strings data, type:
|
|
795
|
|
796 % PathLengthFingerprints.pl -m PathLengthCount -o -r SamplePLFPCount
|
|
797 Sample.sdf
|
|
798
|
|
799 To generate path length fingerprints corresponding to count of all
|
|
800 unique paths from length 1 through 8 in IDsAndValuesString format using
|
|
801 E-state atom types and create a SamplePLFPCount.csv file containing
|
|
802 sequential compound IDs along with fingerprints vector strings data,
|
|
803 type:
|
|
804
|
|
805 % PathLengthFingerprints.pl -m PathLengthCount --AtomIdentifierType
|
|
806 EStateAtomTypes -o -r SamplePLFPCount Sample.sdf
|
|
807
|
|
808 To generate path length fingerprints corresponding to count of all
|
|
809 unique paths from length 1 through 8 in IDsAndValuesString format using
|
|
810 SLogP atom types and create a SamplePLFPCount.csv file containing
|
|
811 sequential compound IDs along with fingerprints vector strings data,
|
|
812 type:
|
|
813
|
|
814 % PathLengthFingerprints.pl -m PathLengthCount --AtomIdentifierType
|
|
815 SLogPAtomTypes -o -r SamplePLFPCount Sample.sdf
|
|
816
|
|
817 To generate path length fingerprints corresponding to count of all
|
|
818 unique paths from length 1 through 8 in IDsAndValuesString format and
|
|
819 create a SamplePLFPCount.csv file containing sequential compound IDs
|
|
820 along with fingerprints vector strings data, type:
|
|
821
|
|
822 % PathLengthFingerprints.pl -m PathLengthCount --VectorStringFormat
|
|
823 ValuesAndIDsPairsString -o -r SamplePLFPCount Sample.sdf
|
|
824
|
|
825 To generate path length fingerprints corresponding to count of all
|
|
826 unique paths from length 1 through 8 in IDsAndValuesString format using
|
|
827 AS,X,BO as atomic invariants and create a SamplePLFPCount.csv file
|
|
828 containing sequential compound IDs along with fingerprints vector
|
|
829 strings data, type:
|
|
830
|
|
831 % PathLengthFingerprints.pl -m PathLengthCount --AtomIdentifierType
|
|
832 AtomicInvariantsAtomTypes --AtomicInvariantsToUse "AS,X,BO" -o
|
|
833 -r SamplePLFPCount Sample.sdf
|
|
834
|
|
835 To generate path length fingerprints corresponding to count of all paths
|
|
836 from length 1 through 8 in IDsAndValuesString format and create a
|
|
837 SamplePLFPCount.csv file containing compound IDs from MolName line along
|
|
838 with fingerprints vector strings data, type:
|
|
839
|
|
840 % PathLengthFingerprints.pl -m PathLengthCount --UseUniquePaths No
|
|
841 -o --CompoundIDMode MolName -r SamplePLFPCount --UseUniquePaths No
|
|
842 Sample.sdf
|
|
843
|
|
844 To generate path length fingerprints corresponding to all unique paths
|
|
845 from length 1 through 8 in hexadecimal bit-vector string format of size
|
|
846 512 after folding and create SamplePLFPHex.sdf, SamplePLFPHex.fpf, and
|
|
847 SamplePLFPHex.sdf files containing sequential compound IDs along with
|
|
848 fingerprints bit-vector strings data, type:
|
|
849
|
|
850 % PathLengthFingerprints.pl --output all --Fold Yes --FoldedSize 512
|
|
851 -o -r SamplePLFPHex Sample.sdf
|
|
852
|
|
853 To generate path length fingerprints corresponding to all unique paths
|
|
854 from length 1 through 8 containing no rings and without sharing of bonds
|
|
855 in hexadecimal bit-vector string format of size 1024 and create a
|
|
856 SamplePLFPHex.csv file containing sequential compound IDs along with
|
|
857 fingerprints bit-vector strings data and all data fields, type:
|
|
858
|
|
859 % PathLengthFingerprints.pl -p AtomPathsWithoutRings --DataFieldsMode All
|
|
860 -o -r SamplePLFPHex Sample.sdf
|
|
861
|
|
862 To generate path length fingerprints corresponding to all unique paths
|
|
863 from length 1 through 8 containing rings and without sharing of bonds in
|
|
864 hexadecimal bit-vector string format of size 1024 and create a
|
|
865 SamplePLFPHex.tsv file containing compound IDs derived from combination
|
|
866 of molecule name line and an explicit compound prefix along with
|
|
867 fingerprints bit-vector strings data and all data fields, type:
|
|
868
|
|
869 % PathLengthFingerprints.pl -p AtomPathsWithRings --DataFieldsMode
|
|
870 CompoundID --CompoundIDMode MolnameOrLabelPrefix --CompoundID Cmpd
|
|
871 --CompoundIDLabel MolID --FingerprintsLabel PathLengthFP --OutDelim Tab
|
|
872 -r SamplePLFPHex -o Sample.sdf
|
|
873
|
|
874 To generate path length fingerprints corresponding to count of all
|
|
875 unique paths from length 1 through 8 in IDsAndValuesString format and
|
|
876 create a SamplePLFPCount.csv file containing sequential compound IDs
|
|
877 along with fingerprints vector strings data using aromaticity specified
|
|
878 in SD file, type:
|
|
879
|
|
880 % PathLengthFingerprints.pl -m PathLengthCount --DetectAromaticity No
|
|
881 -o -r SamplePLFPCount Sample.sdf
|
|
882
|
|
883 To generate path length fingerprints corresponding to all unique paths
|
|
884 from length 2 through 6 in hexadecimal bit-vector string format of size
|
|
885 1024 and create a SamplePLFPHex.csv file containing sequential compound
|
|
886 IDs along with fingerprints bit-vector strings data, type:
|
|
887
|
|
888 % PathLengthFingerprints.pl --MinPathLength 2 --MaxPathLength 6
|
|
889 -o -r SamplePLFPHex Sample.sdf
|
|
890
|
|
891 AUTHOR
|
|
892 Manish Sud <msud@san.rr.com>
|
|
893
|
|
894 SEE ALSO
|
|
895 InfoFingerprintsFiles.pl, SimilarityMatricesFingerprints.pl,
|
|
896 AtomNeighborhoodsFingerprints.pl, ExtendedConnectivityFingerprints.pl,
|
|
897 MACCSKeysFingerprints.pl, TopologicalAtomPairsFingerprints.pl,
|
|
898 TopologicalAtomTorsionsFingerprints.pl,
|
|
899 TopologicalPharmacophoreAtomPairsFingerprints.pl,
|
|
900 TopologicalPharmacophoreAtomTripletsFingerprints.pl
|
|
901
|
|
902 COPYRIGHT
|
|
903 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
904
|
|
905 This file is part of MayaChemTools.
|
|
906
|
|
907 MayaChemTools is free software; you can redistribute it and/or modify it
|
|
908 under the terms of the GNU Lesser General Public License as published by
|
|
909 the Free Software Foundation; either version 3 of the License, or (at
|
|
910 your option) any later version.
|
|
911
|