comparison docs/scripts/man1/PathLengthFingerprints.1 @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 .\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22)
2 .\"
3 .\" Standard preamble:
4 .\" ========================================================================
5 .de Sp \" Vertical space (when we can't use .PP)
6 .if t .sp .5v
7 .if n .sp
8 ..
9 .de Vb \" Begin verbatim text
10 .ft CW
11 .nf
12 .ne \\$1
13 ..
14 .de Ve \" End verbatim text
15 .ft R
16 .fi
17 ..
18 .\" Set up some character translations and predefined strings. \*(-- will
19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
20 .\" double quote, and \*(R" will give a right double quote. \*(C+ will
21 .\" give a nicer C++. Capital omega is used to do unbreakable dashes and
22 .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
23 .\" nothing in troff, for use with C<>.
24 .tr \(*W-
25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
26 .ie n \{\
27 . ds -- \(*W-
28 . ds PI pi
29 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
30 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
31 . ds L" ""
32 . ds R" ""
33 . ds C` ""
34 . ds C' ""
35 'br\}
36 .el\{\
37 . ds -- \|\(em\|
38 . ds PI \(*p
39 . ds L" ``
40 . ds R" ''
41 'br\}
42 .\"
43 .\" Escape single quotes in literal strings from groff's Unicode transform.
44 .ie \n(.g .ds Aq \(aq
45 .el .ds Aq '
46 .\"
47 .\" If the F register is turned on, we'll generate index entries on stderr for
48 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
49 .\" entries marked with X<> in POD. Of course, you'll have to process the
50 .\" output yourself in some meaningful fashion.
51 .ie \nF \{\
52 . de IX
53 . tm Index:\\$1\t\\n%\t"\\$2"
54 ..
55 . nr % 0
56 . rr F
57 .\}
58 .el \{\
59 . de IX
60 ..
61 .\}
62 .\"
63 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
64 .\" Fear. Run. Save yourself. No user-serviceable parts.
65 . \" fudge factors for nroff and troff
66 .if n \{\
67 . ds #H 0
68 . ds #V .8m
69 . ds #F .3m
70 . ds #[ \f1
71 . ds #] \fP
72 .\}
73 .if t \{\
74 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
75 . ds #V .6m
76 . ds #F 0
77 . ds #[ \&
78 . ds #] \&
79 .\}
80 . \" simple accents for nroff and troff
81 .if n \{\
82 . ds ' \&
83 . ds ` \&
84 . ds ^ \&
85 . ds , \&
86 . ds ~ ~
87 . ds /
88 .\}
89 .if t \{\
90 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
91 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
92 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
93 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
94 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
95 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
96 .\}
97 . \" troff and (daisy-wheel) nroff accents
98 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
99 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
100 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
101 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
102 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
103 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
104 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
105 .ds ae a\h'-(\w'a'u*4/10)'e
106 .ds Ae A\h'-(\w'A'u*4/10)'E
107 . \" corrections for vroff
108 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
109 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
110 . \" for low resolution devices (crt and lpr)
111 .if \n(.H>23 .if \n(.V>19 \
112 \{\
113 . ds : e
114 . ds 8 ss
115 . ds o a
116 . ds d- d\h'-1'\(ga
117 . ds D- D\h'-1'\(hy
118 . ds th \o'bp'
119 . ds Th \o'LP'
120 . ds ae ae
121 . ds Ae AE
122 .\}
123 .rm #[ #] #H #V #F C
124 .\" ========================================================================
125 .\"
126 .IX Title "PATHLENGTHFINGERPRINTS 1"
127 .TH PATHLENGTHFINGERPRINTS 1 "2015-03-29" "perl v5.14.2" "MayaChemTools"
128 .\" For nroff, turn off justification. Always turn off hyphenation; it makes
129 .\" way too many mistakes in technical documents.
130 .if n .ad l
131 .nh
132 .SH "NAME"
133 PathLengthFingerprints.pl \- Generate atom path length based fingerprints for SD files
134 .SH "SYNOPSIS"
135 .IX Header "SYNOPSIS"
136 PathLengthFingerprints.pl SDFile(s)...
137 .PP
138 PathLengthFingerprints.pl [\fB\-\-AromaticityModel\fR \fIAromaticityModelType\fR]
139 [\fB\-a, \-\-AtomIdentifierType\fR \fIAtomicInvariantsAtomTypes\fR]
140 [\fB\-\-AtomicInvariantsToUse\fR \fI\*(L"AtomicInvariant1,AtomicInvariant2...\*(R"\fR]
141 [\fB\-\-FunctionalClassesToUse\fR \fI\*(L"FunctionalClass1,FunctionalClass2...\*(R"\fR]
142 [\fB\-\-BitsOrder\fR \fIAscending | Descending\fR] [\fB\-b, \-\-BitStringFormat\fR \fIBinaryString | HexadecimalString\fR]
143 [\fB\-\-CompoundID\fR \fIDataFieldName or LabelPrefixString\fR] [\fB\-\-CompoundIDLabel\fR \fItext\fR]
144 [\fB\-\-CompoundIDMode\fR \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR]
145 [\fB\-\-DataFields\fR \fI\*(L"FieldLabel1,FieldLabel2,... \*(R"\fR] [\fB\-d, \-\-DataFieldsMode\fR \fIAll | Common | Specify | CompoundID\fR]
146 [\fB\-\-DetectAromaticity\fR \fIYes | No\fR] [\fB\-f, \-\-Filter\fR \fIYes | No\fR] [\fB\-\-FingerprintsLabel\fR \fItext\fR]
147 [\fB\-\-fold\fR \fIYes | No\fR] [\fB\-\-FoldedSize\fR \fInumber\fR] [\fB\-h, \-\-help\fR]
148 [\fB\-i, \-\-IgnoreHydrogens\fR \fIYes | No\fR] [\fB\-k, \-\-KeepLargestComponent\fR \fIYes | No\fR]
149 [\fB\-m, \-\-mode\fR \fIPathLengthBits | PathLengthCount\fR]
150 [\fB\-\-MinPathLength\fR \fInumber\fR] [\fB\-\-MaxPathLength\fR \fInumber\fR] [\fB\-n, \-\-NumOfBitsToSetPerPath\fR \fInumber\fR]
151 [\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR]
152 [\fB\-\-output\fR \fI\s-1SD\s0 | \s-1FP\s0 | text | all\fR] [\fB\-q, \-\-quote\fR \fIYes | No\fR] [\fB\-r, \-\-root\fR \fIRootName\fR]
153 [\fB\-p, \-\-PathMode\fR \fIAtomPathsWithoutRings | AtomPathsWithRings | AllAtomPathsWithoutRings | AllAtomPathsWithRings\fR]
154 [\fB\-s, \-\-size\fR \fInumber\fR] [\fB\-u, \-\-UseBondSymbols\fR \fIYes | No\fR] [\fB\-\-UsePerlCoreRandom\fR \fIYes | No\fR]
155 [\fB\-\-UseUniquePaths\fR \fIYes | No\fR] [\fB\-q, \-\-quote\fR \fIYes | No\fR] [\fB\-r, \-\-root\fR \fIRootName\fR]
156 [\fB\-v, \-\-VectorStringFormat\fR \fIIDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString\fR]
157 [\fB\-w, \-\-WorkingDir\fR dirname] SDFile(s)...
158 .SH "DESCRIPTION"
159 .IX Header "DESCRIPTION"
160 Generate atom path length fingerprints for \fISDFile(s)\fR and create appropriate \s-1SD\s0, \s-1FP\s0 or
161 \&\s-1CSV/TSV\s0 text file(s) containing fingerprints bit-vector or vector strings corresponding to
162 molecular fingerprints.
163 .PP
164 Multiple SDFile names are separated by spaces. The valid file extensions are \fI.sdf\fR
165 and \fI.sd\fR. All other file names are ignored. All the \s-1SD\s0 files in a current directory
166 can be specified either by \fI*.sdf\fR or the current directory name.
167 .PP
168 The current release of MayaChemTools supports generation of path length fingerprints
169 corresponding to following \fB\-a, \-\-AtomIdentifierTypes\fR:
170 .PP
171 .Vb 3
172 \& AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
173 \& FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
174 \& SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
175 .Ve
176 .PP
177 Based on the values specified for \fB\-p, \-\-PathMode\fR, \fB\-\-MinPathLength\fR and \fB\-\-MaxPathLength\fR,
178 all appropriate atom paths are generated for each atom in the molecule and collected in a list and
179 the list is filtered to remove any structurally duplicate paths as indicated by the value of
180 \&\fB\-\-UseUniquePaths\fR option.
181 .PP
182 For each atom path in the filtered atom paths list, an atom path string is created using value of
183 \&\fB\-a, \-\-AtomIdentifierType\fR and specified values to use for a particular atom identifier type.
184 Value of \fB\-u, \-\-UseBondSymbols\fR controls whether bond order symbols are used during generation
185 of atom path string. For each atom path, only lexicographically smaller atom path strings are kept.
186 .PP
187 For \fIPathLengthBits\fR value of \fB\-m, \-\-mode\fR option, each atom path is hashed to a 32 bit unsigned
188 integer key using \fBTextUtil::HashCode\fR function. Using the hash key as a seed for a random number
189 generator, a random integer value between 0 and \fB\-\-Size\fR is used to set corresponding bits
190 in the fingerprint bit-vector string. Value of \fB\-\-NumOfBitsToSetPerPath\fR option controls the number
191 of time a random number is generated to set corresponding bits.
192 .PP
193 For \fI PathLengthCount\fR value of \fB\-m, \-\-mode\fR option, the number of times an atom path appears
194 is tracked and a fingerprints count-string corresponding to count of atom paths is generated.
195 .PP
196 Example of \fI\s-1SD\s0\fR file containing path length fingerprints string data:
197 .PP
198 .Vb 10
199 \& ... ...
200 \& ... ...
201 \& $$$$
202 \& ... ...
203 \& ... ...
204 \& ... ...
205 \& 41 44 0 0 0 0 0 0 0 0999 V2000
206 \& \-3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
207 \& ... ...
208 \& 2 3 1 0 0 0 0
209 \& ... ...
210 \& M END
211 \& > <CmpdID>
212 \& Cmpd1
213 \&
214 \& > <PathLengthFingerprints>
215 \& FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLengt
216 \& h1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a49913991a66
217 \& 03130b0a19e8051c89184414953800cc2151082844a201042800130860308e8204d4028
218 \& 00831048940e44281c00060449a5000ac80c894114e006321264401600846c050164462
219 \& 08190410805000304a10205b0100e04c0038ba0fad0209c0ca8b1200012268b61c0026a
220 \& aa0660a11014a011d46
221 \&
222 \& $$$$
223 \& ... ...
224 \& ... ...
225 .Ve
226 .PP
227 Example of \fI\s-1FP\s0\fR file containing path length fingerprints string data:
228 .PP
229 .Vb 10
230 \& #
231 \& # Package = MayaChemTools 7.4
232 \& # ReleaseDate = Oct 21, 2010
233 \& #
234 \& # TimeStamp = Mon Mar 7 15:14:01 2011
235 \& #
236 \& # FingerprintsStringType = FingerprintsBitVector
237 \& #
238 \& # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
239 \& # Size = 1024
240 \& # BitStringFormat = HexadecimalString
241 \& # BitsOrder = Ascending
242 \& #
243 \& Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510...
244 \& Cmpd2 000000249400840040100042011001001980410c000000001010088001120...
245 \& ... ...
246 \& ... ..
247 .Ve
248 .PP
249 Example of \s-1CSV\s0 \fIText\fR file containing pathlength fingerprints string data:
250 .PP
251 .Vb 7
252 \& "CompoundID","PathLengthFingerprints"
253 \& "Cmpd1","FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes
254 \& :MinLength1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a4
255 \& 9913991a6603130b0a19e8051c89184414953800cc2151082844a20104280013086030
256 \& 8e8204d402800831048940e44281c00060449a5000ac80c894114e006321264401..."
257 \& ... ...
258 \& ... ...
259 .Ve
260 .PP
261 The current release of MayaChemTools generates the following types of path length
262 fingerprints bit-vector and vector strings:
263 .PP
264 .Vb 6
265 \& FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
266 \& th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
267 \& 0100010101011000101001011100110001000010001001101000001001001001001000
268 \& 0010110100000111001001000001001010100100100000000011000000101001011100
269 \& 0010000001000101010100000100111100110111011011011000000010110111001101
270 \& 0101100011000000010001000011000010100011101100001000001000100000000...
271 \&
272 \& FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
273 \& th1:MaxLength8;1024;HexadecimalString;Ascending;48caa1315d82d91122b029
274 \& 42861c9409a4208182d12015509767bd0867653604481a8b1288000056090583603078
275 \& 9cedae54e26596889ab121309800900490515224208421502120a0dd9200509723ae89
276 \& 00024181b86c0122821d4e4880c38620dab280824b455404009f082003d52c212b4e6d
277 \& 6ea05280140069c780290c43
278 \&
279 \& FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
280 \& 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
281 \& C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
282 \& 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
283 \& 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
284 \& 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
285 \&
286 \& FingerprintsVector;PathLengthCount:DREIDINGAtomTypes:MinLength1:MaxLen
287 \& gth8;410;NumericalValues;IDsAndValuesPairsString;C_2 2 C_3 9 C_R 22 F_
288 \& 1 N_3 1 N_R 1 O_2 2 O_3 3 C_2=O_2 2 C_2C_3 1 C_2C_R 1 C_2N_3 1 C_2O_3
289 \& 1 C_3C_3 7 C_3C_R 1 C_3N_R 1 C_3O_3 2 C_R:C_R 21 C_R:N_R 2 C_RC_R 2 C
290 \& _RF_ 1 C_RN_3 1 C_2C_3C_3 1 C_2C_R:C_R 2 C_2N_3C_R 1 C_3C_2=O_2 1 C_3C
291 \& _2O_3 1 C_3C_3C_3 5 C_3C_3C_R 2 C_3C_3N_R 1 C_3C_3O_3 4 C_3C_R:C_R ...
292 \&
293 \& FingerprintsVector;PathLengthCount:EStateAtomTypes:MinLength1:MaxLengt
294 \& h8;454;NumericalValues;IDsAndValuesPairsString;aaCH 14 aasC 8 aasN 1 d
295 \& O 2 dssC 2 sCH3 2 sF 1 sOH 3 ssCH2 4 ssNH 1 sssCH 3 aaCH:aaCH 10 aaCH:
296 \& aasC 8 aasC:aasC 3 aasC:aasN 2 aasCaasC 2 aasCdssC 1 aasCsF 1 aasCssNH
297 \& 1 aasCsssCH 1 aasNssCH2 1 dO=dssC 2 dssCsOH 1 dssCssCH2 1 dssCssNH 1
298 \& sCH3sssCH 2 sOHsssCH 2 ssCH2ssCH2 1 ssCH2sssCH 4 aaCH:aaCH:aaCH 6 a...
299 \&
300 \& FingerprintsVector;PathLengthCount:FunctionalClassAtomTypes:MinLength1
301 \& :MaxLength8;404;NumericalValues;IDsAndValuesPairsString;Ar 22 Ar.HBA 1
302 \& HBA 2 HBA.HBD 3 HBD 1 Hal 1 NI 1 None 10 Ar.HBA:Ar 2 Ar.HBANone 1 Ar:
303 \& Ar 21 ArAr 2 ArHBD 1 ArHal 1 ArNone 2 HBA.HBDNI 1 HBA.HBDNone 2 HBA=NI
304 \& 1 HBA=None 1 HBDNone 1 NINone 1 NoneNone 7 Ar.HBA:Ar:Ar 2 Ar.HBA:ArAr
305 \& 1 Ar.HBA:ArNone 1 Ar.HBANoneNone 1 Ar:Ar.HBA:Ar 1 Ar:Ar.HBANone 2 ...
306 \&
307 \& FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt
308 \& h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1
309 \& 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N
310 \& 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1
311 \& CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR
312 \& OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ...
313 \&
314 \& FingerprintsVector;PathLengthCount:SLogPAtomTypes:MinLength1:MaxLength
315 \& 8;518;NumericalValues;IDsAndValuesPairsString;C1 5 C10 1 C11 1 C14 1 C
316 \& 18 14 C20 4 C21 2 C22 1 C5 2 CS 2 F 1 N11 1 N4 1 O10 1 O2 3 O9 1 C10C1
317 \& 1 C10N11 1 C11C1 2 C11C21 1 C14:C18 2 C14F 1 C18:C18 10 C18:C20 4 C18
318 \& :C22 2 C1C5 1 C1CS 4 C20:C20 1 C20:C21 1 C20:N11 1 C20C20 2 C21:C21 1
319 \& C21:N11 1 C21C5 1 C22N4 1 C5=O10 1 C5=O9 1 C5N4 1 C5O2 1 CSO2 2 C10...
320 \&
321 \& FingerprintsVector;PathLengthCount:SYBYLAtomTypes:MinLength1:MaxLength
322 \& 8;412;NumericalValues;IDsAndValuesPairsString;C.2 2 C.3 9 C.ar 22 F 1
323 \& N.am 1 N.ar 1 O.2 1 O.3 2 O.co2 2 C.2=O.2 1 C.2=O.co2 1 C.2C.3 1 C.2C.
324 \& ar 1 C.2N.am 1 C.2O.co2 1 C.3C.3 7 C.3C.ar 1 C.3N.ar 1 C.3O.3 2 C.ar:C
325 \& .ar 21 C.ar:N.ar 2 C.arC.ar 2 C.arF 1 C.arN.am 1 C.2C.3C.3 1 C.2C.ar:C
326 \& .ar 2 C.2N.amC.ar 1 C.3C.2=O.co2 1 C.3C.2O.co2 1 C.3C.3C.3 5 C.3C.3...
327 \&
328 \& FingerprintsVector;PathLengthCount:TPSAAtomTypes:MinLength1:MaxLength8
329 \& ;331;NumericalValues;IDsAndValuesPairsString;N21 1 N7 1 None 34 O3 2 O
330 \& 4 3 N21:None 2 N21None 1 N7None 2 None:None 21 None=O3 2 NoneNone 13 N
331 \& oneO4 3 N21:None:None 2 N21:NoneNone 2 N21NoneNone 1 N7None:None 2 N7N
332 \& one=O3 1 N7NoneNone 1 None:N21:None 1 None:N21None 2 None:None:None 20
333 \& None:NoneNone 12 NoneN7None 1 NoneNone=O3 2 NoneNoneNone 8 NoneNon...
334 \&
335 \& FingerprintsVector;PathLengthCount:UFFAtomTypes:MinLength1:MaxLength8;
336 \& 410;NumericalValues;IDsAndValuesPairsString;C_2 2 C_3 9 C_R 22 F_ 1 N_
337 \& 3 1 N_R 1 O_2 2 O_3 3 C_2=O_2 2 C_2C_3 1 C_2C_R 1 C_2N_3 1 C_2O_3 1 C_
338 \& 3C_3 7 C_3C_R 1 C_3N_R 1 C_3O_3 2 C_R:C_R 21 C_R:N_R 2 C_RC_R 2 C_RF_
339 \& 1 C_RN_3 1 C_2C_3C_3 1 C_2C_R:C_R 2 C_2N_3C_R 1 C_3C_2=O_2 1 C_3C_2O_3
340 \& 1 C_3C_3C_3 5 C_3C_3C_R 2 C_3C_3N_R 1 C_3C_3O_3 4 C_3C_R:C_R 1 C_3...
341 .Ve
342 .SH "OPTIONS"
343 .IX Header "OPTIONS"
344 .IP "\fB\-\-AromaticityModel\fR \fIMDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel\fR" 4
345 .IX Item "--AromaticityModel MDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel"
346 Specify aromaticity model to use during detection of aromaticity. Possible values in the current
347 release are: \fIMDLAromaticityModel, TriposAromaticityModel, MMFFAromaticityModel,
348 ChemAxonBasicAromaticityModel, ChemAxonGeneralAromaticityModel, DaylightAromaticityModel
349 or MayaChemToolsAromaticityModel\fR. Default value: \fIMayaChemToolsAromaticityModel\fR.
350 .Sp
351 The supported aromaticity model names along with model specific control parameters
352 are defined in \fBAromaticityModelsData.csv\fR, which is distributed with the current release
353 and is available under \fBlib/data\fR directory. \fBMolecule.pm\fR module retrieves data from
354 this file during class instantiation and makes it available to method \fBDetectAromaticity\fR
355 for detecting aromaticity corresponding to a specific model.
356 .Sp
357 This option is ignored during \fINo\fR value of \fB\-\-DetectAromaticity\fR option.
358 .IP "\fB\-a, \-\-AtomIdentifierType\fR \fIAtomicInvariantsAtomTypes | DREIDINGAtomTypes | EStateAtomTypes | FunctionalClassAtomTypes | MMFF94AtomTypes | SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes\fR" 4
359 .IX Item "-a, --AtomIdentifierType AtomicInvariantsAtomTypes | DREIDINGAtomTypes | EStateAtomTypes | FunctionalClassAtomTypes | MMFF94AtomTypes | SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes"
360 Specify atom identifier type to use for assignment of atom types to hydrogen and/or
361 non-hydrogen atoms during calculation of atom types fingerprints. Possible values in the
362 current release are: \fIAtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
363 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
364 TPSAAtomTypes, UFFAtomTypes\fR. Default value: \fIAtomicInvariantsAtomTypes\fR.
365 .IP "\fB\-a, \-\-AtomIdentifierType\fR \fIAtomicInvariantsAtomTypes | DREIDINGAtomTypes | EStateAtomTypes | FunctionalClassAtomTypes | MMFF94AtomTypes | SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes\fR" 4
366 .IX Item "-a, --AtomIdentifierType AtomicInvariantsAtomTypes | DREIDINGAtomTypes | EStateAtomTypes | FunctionalClassAtomTypes | MMFF94AtomTypes | SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes"
367 Specify atom identifier type to use during generation of atom path strings
368 corresponding to path length fingerprints. Possible values in the current release are:
369 \&\fIAtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
370 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
371 TPSAAtomTypes, UFFAtomTypes\fR. Default value: \fIAtomicInvariantsAtomTypes\fR.
372 .ie n .IP "\fB\-\-AtomicInvariantsToUse\fR \fI""AtomicInvariant1,AtomicInvariant2...""\fR" 4
373 .el .IP "\fB\-\-AtomicInvariantsToUse\fR \fI``AtomicInvariant1,AtomicInvariant2...''\fR" 4
374 .IX Item "--AtomicInvariantsToUse AtomicInvariant1,AtomicInvariant2..."
375 This value is used during \fIAtomicInvariantsAtomTypes\fR value of \fBa, \-\-AtomIdentifierType\fR
376 option. It's a list of comma separated valid atomic invariant atom types.
377 .Sp
378 Possible values for atomic invariants are: \fI\s-1AS\s0, X, \s-1BO\s0, \s-1LBO\s0, \s-1SB\s0, \s-1DB\s0, \s-1TB\s0,
379 H, Ar, \s-1RA\s0, \s-1FC\s0, \s-1MN\s0, \s-1SM\s0\fR. Default value: \fI\s-1AS\s0\fR.
380 .Sp
381 The atomic invariants abbreviations correspond to:
382 .Sp
383 .Vb 1
384 \& AS = Atom symbol corresponding to element symbol
385 \&
386 \& X<n> = Number of non\-hydrogen atom neighbors or heavy atoms
387 \& BO<n> = Sum of bond orders to non\-hydrogen atom neighbors or heavy atoms
388 \& LBO<n> = Largest bond order of non\-hydrogen atom neighbors or heavy atoms
389 \& SB<n> = Number of single bonds to non\-hydrogen atom neighbors or heavy atoms
390 \& DB<n> = Number of double bonds to non\-hydrogen atom neighbors or heavy atoms
391 \& TB<n> = Number of triple bonds to non\-hydrogen atom neighbors or heavy atoms
392 \& H<n> = Number of implicit and explicit hydrogens for atom
393 \& Ar = Aromatic annotation indicating whether atom is aromatic
394 \& RA = Ring atom annotation indicating whether atom is a ring
395 \& FC<+n/\-n> = Formal charge assigned to atom
396 \& MN<n> = Mass number indicating isotope other than most abundant isotope
397 \& SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or
398 \& 3 (triplet)
399 .Ve
400 .Sp
401 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
402 .Sp
403 .Vb 1
404 \& AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/\-n>.MN<n>.SM<n>
405 .Ve
406 .Sp
407 Except for \s-1AS\s0 which is a required atomic invariant in atom types, all other atomic invariants are
408 optional. Atom type specification doesn't include atomic invariants with zero or undefined values.
409 .Sp
410 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
411 are also allowed:
412 .Sp
413 .Vb 12
414 \& X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
415 \& BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
416 \& LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
417 \& SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
418 \& DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
419 \& TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
420 \& H : NumOfImplicitAndExplicitHydrogens
421 \& Ar : Aromatic
422 \& RA : RingAtom
423 \& FC : FormalCharge
424 \& MN : MassNumber
425 \& SM : SpinMultiplicity
426 .Ve
427 .Sp
428 Examples:
429 .Sp
430 \&\fBBenzene\fR: Using value of \fI\s-1AS\s0\fR for \fB\-\-AtomicInvariantsToUse\fR, \fIYes\fR for \fBUseBondSymbols\fR,
431 and \fI AllAtomPathsWithRings\fR for \fB\-p, \-\-PathMode\fR, atom path strings generated are:
432 .Sp
433 .Vb 1
434 \& C C:C C:C:C C:C:C:C C:C:C:C:C C:C:C:C:C:C C:C:C:C:C:C:C
435 .Ve
436 .Sp
437 And using \fI\s-1AS\s0,X,BO\fR for \fB\-\-AtomicInvariantsToUse\fR generates following atom path
438 strings:
439 .Sp
440 .Vb 5
441 \& C.X2.BO3 C.X2.BO3:C.X2.BO3 C.X2.BO3:C.X2.BO3:C.X2.BO3
442 \& C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3
443 \& C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3
444 \& C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3
445 \& C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3:C.X2.BO3
446 .Ve
447 .Sp
448 \&\fBUrea\fR: Using value of \fI\s-1AS\s0\fR for \fB\-\-AtomicInvariantsToUse\fR, \fIYes\fR for \fBUseBondSymbols\fR,
449 and \fI AllAtomPathsWithRings\fR for \fB\-p, \-\-PathMode\fR, atom path strings are:
450 .Sp
451 .Vb 1
452 \& C N O C=O CN NC=O NCN
453 .Ve
454 .Sp
455 And using \fI\s-1AS\s0,X,BO\fR for \fB\-\-AtomicInvariantsToUse\fR generates following atom path
456 strings:
457 .Sp
458 .Vb 3
459 \& C.X3.BO4 N.X1.BO1 O.X1.BO2 C.X3.BO4=O.X1.BO2
460 \& C.X3.BO4N.X1.BO1 N.X1.BO1C.X3.BO4=O.X1.BO2
461 \& N.X1.BO1C.X3.BO4N.X1.BO1
462 .Ve
463 .ie n .IP "\fB\-\-FunctionalClassesToUse\fR \fI""FunctionalClass1,FunctionalClass2...""\fR" 4
464 .el .IP "\fB\-\-FunctionalClassesToUse\fR \fI``FunctionalClass1,FunctionalClass2...''\fR" 4
465 .IX Item "--FunctionalClassesToUse FunctionalClass1,FunctionalClass2..."
466 This value is used during \fIFunctionalClassAtomTypes\fR value of \fBa, \-\-AtomIdentifierType\fR
467 option. It's a list of comma separated valid functional classes.
468 .Sp
469 Possible values for atom functional classes are: \fIAr, \s-1CA\s0, H, \s-1HBA\s0, \s-1HBD\s0, Hal, \s-1NI\s0, \s-1PI\s0, \s-1RA\s0\fR.
470 Default value [ Ref 24 ]: \fI\s-1HBD\s0,HBA,PI,NI,Ar,Hal\fR.
471 .Sp
472 The functional class abbreviations correspond to:
473 .Sp
474 .Vb 9
475 \& HBD: HydrogenBondDonor
476 \& HBA: HydrogenBondAcceptor
477 \& PI : PositivelyIonizable
478 \& NI : NegativelyIonizable
479 \& Ar : Aromatic
480 \& Hal : Halogen
481 \& H : Hydrophobic
482 \& RA : RingAtom
483 \& CA : ChainAtom
484 \&
485 \& Functional class atom type specification for an atom corresponds to:
486 \&
487 \& Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
488 .Ve
489 .Sp
490 \&\fIAtomTypes::FunctionalClassAtomTypes\fR module is used to assign functional class atom
491 types. It uses following definitions [ Ref 60\-61, Ref 65\-66 ]:
492 .Sp
493 .Vb 4
494 \& HydrogenBondDonor: NH, NH2, OH
495 \& HydrogenBondAcceptor: N[!H], O
496 \& PositivelyIonizable: +, NH2
497 \& NegativelyIonizable: \-, C(=O)OH, S(=O)OH, P(=O)OH
498 .Ve
499 .IP "\fB\-\-BitsOrder\fR \fIAscending | Descending\fR" 4
500 .IX Item "--BitsOrder Ascending | Descending"
501 Bits order to use during generation of fingerprints bit-vector string for \fIPathLengthBits\fR value of
502 \&\fB\-m, \-\-mode\fR option. Possible values: \fIAscending, Descending\fR. Default: \fIAscending\fR.
503 .Sp
504 \&\fIAscending\fR bit order which corresponds to first bit in each byte as the lowest bit as
505 opposed to the highest bit.
506 .Sp
507 Internally, bits are stored in \fIAscending\fR order using Perl vec function. Regardless
508 of machine order, big-endian or little-endian, vec function always considers first
509 string byte as the lowest byte and first bit within each byte as the lowest bit.
510 .IP "\fB\-b, \-\-BitStringFormat\fR \fIBinaryString | HexadecimalString\fR" 4
511 .IX Item "-b, --BitStringFormat BinaryString | HexadecimalString"
512 Format of fingerprints bit-vector string data in output \s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) specified by
513 \&\fB\-\-output\fR used during \fIPathLengthBits\fR value of \fB\-m, \-\-mode\fR option. Possible
514 values: \fIBinaryString, HexadecimalString\fR. Default value: \fIHexadecimalString\fR.
515 .Sp
516 \&\fIBinaryString\fR corresponds to an \s-1ASCII\s0 string containing 1s and 0s. \fIHexadecimalString\fR
517 contains bit values in \s-1ASCII\s0 hexadecimal format.
518 .Sp
519 Examples:
520 .Sp
521 .Vb 6
522 \& FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
523 \& th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
524 \& 0100010101011000101001011100110001000010001001101000001001001001001000
525 \& 0010110100000111001001000001001010100100100000000011000000101001011100
526 \& 0010000001000101010100000100111100110111011011011000000010110111001101
527 \& 0101100011000000010001000011000010100011101100001000001000100000000...
528 \&
529 \& FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
530 \& th1:MaxLength8;1024;HexadecimalString;Ascending;48caa1315d82d91122b029
531 \& 42861c9409a4208182d12015509767bd0867653604481a8b1288000056090583603078
532 \& 9cedae54e26596889ab121309800900490515224208421502120a0dd9200509723ae89
533 \& 00024181b86c0122821d4e4880c38620dab280824b455404009f082003d52c212b4e6d
534 \& 6ea05280140069c780290c43
535 .Ve
536 .IP "\fB\-\-CompoundID\fR \fIDataFieldName or LabelPrefixString\fR" 4
537 .IX Item "--CompoundID DataFieldName or LabelPrefixString"
538 This value is \fB\-\-CompoundIDMode\fR specific and indicates how compound \s-1ID\s0 is generated.
539 .Sp
540 For \fIDataField\fR value of \fB\-\-CompoundIDMode\fR option, it corresponds to datafield label name
541 whose value is used as compound \s-1ID\s0; otherwise, it's a prefix string used for generating compound
542 IDs like LabelPrefixString<Number>. Default value, \fICmpd\fR, generates compound IDs which
543 look like Cmpd<Number>.
544 .Sp
545 Examples for \fIDataField\fR value of \fB\-\-CompoundIDMode\fR:
546 .Sp
547 .Vb 2
548 \& MolID
549 \& ExtReg
550 .Ve
551 .Sp
552 Examples for \fILabelPrefix\fR or \fIMolNameOrLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR:
553 .Sp
554 .Vb 1
555 \& Compound
556 .Ve
557 .Sp
558 The value specified above generates compound IDs which correspond to Compound<Number>
559 instead of default value of Cmpd<Number>.
560 .IP "\fB\-\-CompoundIDLabel\fR \fItext\fR" 4
561 .IX Item "--CompoundIDLabel text"
562 Specify compound \s-1ID\s0 column label for \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) used during \fICompoundID\fR value
563 of \fB\-\-DataFieldsMode\fR option. Default: \fICompoundID\fR.
564 .IP "\fB\-\-CompoundIDMode\fR \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR" 4
565 .IX Item "--CompoundIDMode DataField | MolName | LabelPrefix | MolNameOrLabelPrefix"
566 Specify how to generate compound IDs and write to \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) along with generated
567 fingerprints for \fI\s-1FP\s0 | text | all\fR values of \fB\-\-output\fR option: use a \fISDFile(s)\fR datafield value;
568 use molname line from \fISDFile(s)\fR; generate a sequential \s-1ID\s0 with specific prefix; use combination
569 of both MolName and LabelPrefix with usage of LabelPrefix values for empty molname lines.
570 .Sp
571 Possible values: \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR.
572 Default: \fILabelPrefix\fR.
573 .Sp
574 For \fIMolNameAndLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR, molname line in \fISDFile(s)\fR takes
575 precedence over sequential compound IDs generated using \fILabelPrefix\fR and only empty molname
576 values are replaced with sequential compound IDs.
577 .Sp
578 This is only used for \fICompoundID\fR value of \fB\-\-DataFieldsMode\fR option.
579 .ie n .IP "\fB\-\-DataFields\fR \fI""FieldLabel1,FieldLabel2,... ""\fR" 4
580 .el .IP "\fB\-\-DataFields\fR \fI``FieldLabel1,FieldLabel2,... ''\fR" 4
581 .IX Item "--DataFields FieldLabel1,FieldLabel2,... "
582 Comma delimited list of \fISDFiles(s)\fR data fields to extract and write to \s-1CSV/TSV\s0 text file(s) along
583 with generated fingerprints for \fItext | all\fR values of \fB\-\-output\fR option.
584 .Sp
585 This is only used for \fISpecify\fR value of \fB\-\-DataFieldsMode\fR option.
586 .Sp
587 Examples:
588 .Sp
589 .Vb 2
590 \& Extreg
591 \& MolID,CompoundName
592 .Ve
593 .IP "\fB\-d, \-\-DataFieldsMode\fR \fIAll | Common | Specify | CompoundID\fR" 4
594 .IX Item "-d, --DataFieldsMode All | Common | Specify | CompoundID"
595 Specify how data fields in \fISDFile(s)\fR are transferred to output \s-1CSV/TSV\s0 text file(s) along
596 with generated fingerprints for \fItext | all\fR values of \fB\-\-output\fR option: transfer all \s-1SD\s0
597 data field; transfer \s-1SD\s0 data files common to all compounds; extract specified data fields;
598 generate a compound \s-1ID\s0 using molname line, a compound prefix, or a combination of both.
599 Possible values: \fIAll | Common | specify | CompoundID\fR. Default value: \fICompoundID\fR.
600 .IP "\fB\-\-DetectAromaticity\fR \fIYes | No\fR" 4
601 .IX Item "--DetectAromaticity Yes | No"
602 Detect aromaticity before generating fingerprints. Possible values: \fIYes or No\fR.
603 Default value: \fIYes\fR.
604 .Sp
605 \&\fINo\fR \fB\-\-DetectAromaticity\fR forces usage of atom and bond aromaticity values
606 from \fISDFile(s)\fR and skips the step which detects and assigns aromaticity.
607 .Sp
608 \&\fINo\fR \fB\-\-DetectAromaticity\fR value is only allowed uring \fIAtomicInvariantsAtomTypes\fR
609 value of \fB\-a, \-\-AtomIdentifierType\fR options; for all possible values \fB\-a, \-\-AtomIdentifierType\fR
610 values, it must be \fIYes\fR.
611 .IP "\fB\-f, \-\-Filter\fR \fIYes | No\fR" 4
612 .IX Item "-f, --Filter Yes | No"
613 Specify whether to check and filter compound data in SDFile(s). Possible values: \fIYes or No\fR.
614 Default value: \fIYes\fR.
615 .Sp
616 By default, compound data is checked before calculating fingerprints and compounds containing
617 atom data corresponding to non-element symbols or no atom data are ignored.
618 .IP "\fB\-\-FingerprintsLabel\fR \fItext\fR" 4
619 .IX Item "--FingerprintsLabel text"
620 \&\s-1SD\s0 data label or text file column label to use for fingerprints string in output \s-1SD\s0 or
621 \&\s-1CSV/TSV\s0 text file(s) specified by \fB\-\-output\fR. Default value: \fIPathLenghFingerprints\fR.
622 .IP "\fB\-\-fold\fR \fIYes | No\fR" 4
623 .IX Item "--fold Yes | No"
624 Fold fingerprints to increase bit density during \fIPathLengthBits\fR value of
625 \&\fB\-m, \-\-mode\fR option. Possible values: \fIYes or No\fR. Default value: \fINo\fR.
626 .IP "\fB\-\-FoldedSize\fR \fInumber\fR" 4
627 .IX Item "--FoldedSize number"
628 Size of folded fingerprint during \fIPathLengthBits\fR value of \fB\-m, \-\-mode\fR option. Default
629 value: \fI256\fR. Valid values correspond to any positive integer which is less than
630 \&\fB\-s, \-\-size\fR and meets the criteria for its value.
631 .Sp
632 Examples:
633 .Sp
634 .Vb 2
635 \& 128
636 \& 512
637 .Ve
638 .IP "\fB\-h, \-\-help\fR" 4
639 .IX Item "-h, --help"
640 Print this help message
641 .IP "\fB\-i, \-\-IgnoreHydrogens\fR \fIYes | No\fR" 4
642 .IX Item "-i, --IgnoreHydrogens Yes | No"
643 Ignore hydrogens during fingerprints generation. Possible values: \fIYes or No\fR.
644 Default value: \fIYes\fR.
645 .Sp
646 For \fIyes\fR value of \fB\-i, \-\-IgnoreHydrogens\fR, any explicit hydrogens are also used for
647 generation of atoms path lengths and fingerprints; implicit hydrogens are still ignored.
648 .IP "\fB\-k, \-\-KeepLargestComponent\fR \fIYes | No\fR" 4
649 .IX Item "-k, --KeepLargestComponent Yes | No"
650 Generate fingerprints for only the largest component in molecule. Possible values:
651 \&\fIYes or No\fR. Default value: \fIYes\fR.
652 .Sp
653 For molecules containing multiple connected components, fingerprints can be generated
654 in two different ways: use all connected components or just the largest connected
655 component. By default, all atoms except for the largest connected component are
656 deleted before generation of fingerprints.
657 .IP "\fB\-m, \-\-mode\fR \fIPathLengthBits | PathLengthCount\fR" 4
658 .IX Item "-m, --mode PathLengthBits | PathLengthCount"
659 Specify type of path length fingerprints to generate for molecules in \fISDFile(s)\fR. Possible
660 values: \fIPathLengthBits, PathLengthCount\fR. Default value: \fIPathLengthBits\fR.
661 .Sp
662 For \fIPathLengthBits\fR value of \fB\-m, \-\-mode\fR option, a fingerprint bit-vector string containing
663 zeros and ones is generated and for \fIPathLengthCount\fR value, a fingerprint vector string
664 corresponding to number of atom paths is generated.
665 .IP "\fB\-\-MinPathLength\fR \fInumber\fR" 4
666 .IX Item "--MinPathLength number"
667 Minimum atom path length to include in fingerprints. Default value: \fI1\fR. Valid values:
668 positive integers and less than \fB\-\-MaxPathLength\fR. Path length of 1 correspond to
669 a path containing only one atom.
670 .IP "\fB\-\-MaxPathLength\fR \fInumber\fR" 4
671 .IX Item "--MaxPathLength number"
672 Maximum atom path length to include in fingerprints. Default value: \fI8\fR. Valid values:
673 positive integers and greater than \fB\-\-MinPathLength\fR.
674 .IP "\fB\-n, \-\-NumOfBitsToSetPerPath\fR \fInumber\fR" 4
675 .IX Item "-n, --NumOfBitsToSetPerPath number"
676 Number of bits to set per path during generation of fingerprints bit-vector string for \fIPathLengthBits\fR
677 value of \fB\-m, \-\-mode\fR option. Default value: \fI1\fR. Valid values: positive integers.
678 .IP "\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR" 4
679 .IX Item "--OutDelim comma | tab | semicolon"
680 Delimiter for output \s-1CSV/TSV\s0 text file(s). Possible values: \fIcomma, tab, or semicolon\fR
681 Default value: \fIcomma\fR.
682 .IP "\fB\-\-output\fR \fI\s-1SD\s0 | \s-1FP\s0 | text | all\fR" 4
683 .IX Item "--output SD | FP | text | all"
684 Type of output files to generate. Possible values: \fI\s-1SD\s0, \s-1FP\s0, text, or all\fR. Default value: \fItext\fR.
685 .IP "\fB\-o, \-\-overwrite\fR" 4
686 .IX Item "-o, --overwrite"
687 Overwrite existing files.
688 .IP "\fB\-p, \-\-PathMode\fR \fIAtomPathsWithoutRings | AtomPathsWithRings | AllAtomPathsWithoutRings | AllAtomPathsWithRings\fR" 4
689 .IX Item "-p, --PathMode AtomPathsWithoutRings | AtomPathsWithRings | AllAtomPathsWithoutRings | AllAtomPathsWithRings"
690 Specify type of atom paths to use for generating pathlength fingerprints for molecules in
691 \&\fISDFile(s)\fR. Possible values:\fIAtomPathsWithoutRings, AtomPathsWithRings,
692 AllAtomPathsWithoutRings, AllAtomPathsWithRings\fR. Default value: \fIAllAtomPathsWithRings\fR.
693 .Sp
694 For molecules with no rings, first two and last two options are equivalent and generate
695 same set of atom paths starting from each atom with length between \fB\-\-MinPathLength\fR
696 and \fB\-\-MaxPathLength\fR. However, all these four options can result in the same set of
697 final atom paths for molecules containing fused, bridged or spiro rings.
698 .Sp
699 For molecules containing rings, atom paths starting from each atom can be traversed in
700 four different ways:
701 .Sp
702 \&\fIAtomPathsWithoutRings\fR \- Atom paths containing no rings and without sharing of bonds
703 in traversed paths.
704 .Sp
705 \&\fIAtomPathsWithRings\fR \- Atom paths containing rings and without any sharing of bonds in
706 traversed paths.
707 .Sp
708 \&\fIAllAtomPathsWithoutRings\fR \- All possible atom paths containing no rings and without any
709 sharing of bonds in traversed paths.
710 .Sp
711 \&\fIAllAtomPathsWithRings\fR \- All possible atom paths containing rings and with sharing of
712 bonds in traversed paths.
713 .Sp
714 Atom path traversal is terminated at the ring atom.
715 .Sp
716 Based on values specified for for \fB\-p, \-\-PathMode\fR, \fB\-\-MinPathLength\fR and
717 \&\fB\-\-MaxPathLength\fR, all appropriate atom paths are generated for each atom in the molecule
718 and collected in a list.
719 .Sp
720 For each atom path in the filtered atom paths list, an atom path string is created using value of
721 \&\fB\-a, \-\-AtomIdentifierType\fR and specified values to use for a particular atom identifier type.
722 Value of \fB\-u, \-\-UseBondSymbols\fR controls whether bond order symbols are used during generation
723 of atom path string. Atom symbol corresponds to element symbol and characters used to represent
724 bond order are: \fI1 \- None; 2 \- '='; 3 \- '#'; 1.5 or aromatic \- ':'; others: bond order value\fR. By default,
725 bond symbols are included in atom path strings. Exclusion of bond symbols in atom path strings
726 results in fingerprints which correspond purely to atom paths without considering bonds.
727 .Sp
728 \&\fBUseUniquePaths\fR controls the removal of structurally duplicate atom path strings are removed
729 from the list.
730 .Sp
731 For \fIPathLengthBits\fR value of \fB\-m, \-\-mode\fR option, each atom path is hashed to a 32 bit unsigned
732 integer key using \fBTextUtil::HashCode\fR function. Using the hash key as a seed for a random number
733 generator, a random integer value between 0 and \fB\-\-Size\fR is used to set corresponding bits
734 in the fingerprint bit-vector string. Value of \fB\-\-NumOfBitsToSetPerPaths\fR option controls the number
735 of time a random number is generated to set corresponding bits.
736 .Sp
737 For \fI PathLengthCount\fR value of \fB\-m, \-\-mode\fR option, the number of times an atom path appears
738 is tracked and a fingerprints count-string corresponding to count of atom paths is generated.
739 .Sp
740 For molecule containing rings, combination of \fB\-p, \-\-PathMode\fR and \fB\-\-UseBondSymbols\fR allows
741 generation of up to 8 different types of atom path length strings:
742 .Sp
743 .Vb 1
744 \& AllowSharedBonds AllowRings UseBondSymbols
745 \&
746 \& 0 0 1 \- AtomPathsNoCyclesWithBondSymbols
747 \& 0 1 1 \- AtomPathsWithCyclesWithBondSymbols
748 \&
749 \& 1 0 1 \- AllAtomPathsNoCyclesWithBondSymbols
750 \& 1 1 1 \- AllAtomPathsWithCyclesWithBondSymbols
751 \& [ DEFAULT ]
752 \&
753 \& 0 0 0 \- AtomPathsNoCyclesNoBondSymbols
754 \& 0 1 0 \- AtomPathsWithCyclesNoBondSymbols
755 \&
756 \& 1 0 0 \- AllAtomPathsNoCyclesNoBondSymbols
757 \& 1 1 0 \- AllAtomPathsWithCyclesNoWithBondSymbols
758 .Ve
759 .Sp
760 Default atom path length fingerprints generation for molecules containing rings with
761 \&\fIAllAtomPathsWithRings\fR value for \fB\-p, \-\-PathMode\fR, \fIYes\fR value for \fB\-\-UseBondSymbols\fR,
762 \&\fI2\fR value for \fB\-\-MinPathLength\fR and \fI8\fR value for \fB\-\-MaxPathLength\fR is the most time
763 consuming. Combinations of other options can substantially speed up fingerprint generation
764 for molecules containing complex ring systems.
765 .Sp
766 Additionally, value for option \fB\-a, \-\-AtomIdentifierType\fR in conjunction with corresponding specified
767 values for atom types changes the nature of atom path length strings and the fingerprints.
768 .IP "\fB\-q, \-\-quote\fR \fIYes | No\fR" 4
769 .IX Item "-q, --quote Yes | No"
770 Put quote around column values in output \s-1CSV/TSV\s0 text file(s). Possible values:
771 \&\fIYes or No\fR. Default value: \fIYes\fR.
772 .IP "\fB\-r, \-\-root\fR \fIRootName\fR" 4
773 .IX Item "-r, --root RootName"
774 New file name is generated using the root: <Root>.<Ext>. Default for new file
775 names: <SDFileName><PathLengthFP>.<Ext>. The file type determines <Ext> value.
776 The sdf, fpf, csv, and tsv <Ext> values are used for \s-1SD\s0, \s-1FP\s0, comma/semicolon, and tab
777 delimited text files, respectively.This option is ignored for multiple input files.
778 .IP "\fB\-s, \-\-size\fR \fInumber\fR" 4
779 .IX Item "-s, --size number"
780 Size of fingerprints. Default value: \fI1024\fR. Valid values correspond to any positive
781 integer which satisfies the following criteria: power of 2, >= 32 and <= 2 ** 32.
782 .Sp
783 Examples:
784 .Sp
785 .Vb 3
786 \& 256
787 \& 512
788 \& 2048
789 .Ve
790 .IP "\fB\-u, \-\-UseBondSymbols\fR \fIYes | No\fR" 4
791 .IX Item "-u, --UseBondSymbols Yes | No"
792 Specify whether to use bond symbols for atom paths during generation of atom path strings.
793 Possible values: \fIYes or No\fR. Default value: \fIYes\fR.
794 .Sp
795 \&\fINo\fR value option for \fB\-u, \-\-UseBondSymbols\fR allows the generation of fingerprints corresponding
796 purely to atoms disregarding all bonds.
797 .IP "\fB\-\-UsePerlCoreRandom\fR \fIYes | No\fR" 4
798 .IX Item "--UsePerlCoreRandom Yes | No"
799 Specify whether to use Perl CORE::rand or MayaChemTools MathUtil::random function
800 during random number generation for setting bits in fingerprints bit-vector strings. Possible
801 values: \fIYes or No\fR. Default value: \fIYes\fR.
802 .Sp
803 \&\fINo\fR value option for \fB\-\-UsePerlCoreRandom\fR allows the generation of fingerprints
804 bit-vector strings which are same across different platforms.
805 .Sp
806 The random number generator implemented in MayaChemTools is a variant of
807 linear congruential generator (\s-1LCG\s0) as described by Miller et al. [ Ref 120 ].
808 It is also referred to as Lehmer random number generator or Park-Miller
809 random number generator.
810 .Sp
811 Unlike Perl's core random number generator function rand, the random number
812 generator implemented in MayaChemTools, MathUtil::random, generates consistent
813 random values across different platforms for a specific random seed and leads
814 to generation of portable fingerprints bit-vector strings.
815 .IP "\fB\-\-UseUniquePaths\fR \fIYes | No\fR" 4
816 .IX Item "--UseUniquePaths Yes | No"
817 Specify whether to use structurally unique atom paths during generation of atom path strings.
818 Possible values: \fIYes or No\fR. Default value: \fIYes\fR.
819 .Sp
820 \&\fINo\fR value option for \fB\-\-UseUniquePaths\fR allows usage of all atom paths generated by
821 \&\fB\-p, \-\-PathMode\fR option value for generation of atom path strings leading to duplicate
822 path count during \fIPathLengthCount\fR value of \fB\-m, \-\-mode\fR option. It doesn't change fingerprint
823 string generated during \fIPathLengthBits\fR value of \fB\-m, \-\-mode\fR.
824 .Sp
825 For example, during \fIAllAtomPathsWithRings\fR value of \fB\-p, \-\-PathMode\fR option, benzene has
826 12 linear paths of length 2 and 12 cyclic paths length of 7, but only 6 linear paths of length 2 and
827 1 cyclic path of length 7 are structurally unique.
828 .IP "\fB\-v, \-\-VectorStringFormat\fR \fIIDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString\fR" 4
829 .IX Item "-v, --VectorStringFormat IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString"
830 Format of fingerprints vector string data in output \s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) specified by
831 \&\fB\-\-output\fR used during \fIPathLengthCount\fR value of \fB\-m, \-\-mode\fR option. Possible
832 values: \fIIDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString |
833 ValuesAndIDsPairsString\fR. Defaultvalue: \fIIDsAndValuesString\fR.
834 .Sp
835 Examples:
836 .Sp
837 .Vb 6
838 \& FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
839 \& 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
840 \& C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
841 \& 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
842 \& 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
843 \& 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
844 \&
845 \& FingerprintsVector;PathLengthCount:EStateAtomTypes:MinLength1:MaxLengt
846 \& h8;454;NumericalValues;IDsAndValuesPairsString;aaCH 14 aasC 8 aasN 1 d
847 \& O 2 dssC 2 sCH3 2 sF 1 sOH 3 ssCH2 4 ssNH 1 sssCH 3 aaCH:aaCH 10 aaCH:
848 \& aasC 8 aasC:aasC 3 aasC:aasN 2 aasCaasC 2 aasCdssC 1 aasCsF 1 aasCssNH
849 \& 1 aasCsssCH 1 aasNssCH2 1 dO=dssC 2 dssCsOH 1 dssCssCH2 1 dssCssNH 1
850 \& sCH3sssCH 2 sOHsssCH 2 ssCH2ssCH2 1 ssCH2sssCH 4 aaCH:aaCH:aaCH 6 a...
851 .Ve
852 .IP "\fB\-w, \-\-WorkingDir\fR \fIDirName\fR" 4
853 .IX Item "-w, --WorkingDir DirName"
854 Location of working directory. Default: current directory.
855 .SH "EXAMPLES"
856 .IX Header "EXAMPLES"
857 To generate path length fingerprints corresponding to all unique paths from length 1
858 through 8 in hexadecimal bit-vector string format of size 1024 and create a
859 SamplePLFPHex.csv file containing sequential compound IDs along with fingerprints
860 bit-vector strings data, type:
861 .PP
862 .Vb 1
863 \& % PathLengthFingerprints.pl \-o \-r SamplePLFPHex Sample.sdf
864 .Ve
865 .PP
866 To generate path length fingerprints corresponding to all unique paths from length 1
867 through 8 in hexadecimal bit-vector string format of size 1024 and create SamplePLFPHex.sdf,
868 SamplePLFPHex.fpf, and SamplePLFPHex.csv files containing sequential compound IDs
869 in \s-1CSV\s0 file along with fingerprints bit-vector strings data, type:
870 .PP
871 .Vb 1
872 \& % PathLengthFingerprints.pl \-\-output all \-o \-r SamplePLFPHex Sample.sdf
873 .Ve
874 .PP
875 To generate path length fingerprints corresponding to all unique paths from length 1
876 through 8 in binary bit-vector string format of size 1024 and create a
877 SamplePLFPBin.csv file containing sequential compound IDs along with fingerprints
878 bit-vector strings data, type:
879 .PP
880 .Vb 2
881 \& % PathLengthFingerprints.pl \-\-BitStringFormat BinaryString \-\-size 2048
882 \& \-o \-r SamplePLFPBin Sample.sdf
883 .Ve
884 .PP
885 To generate path length fingerprints corresponding to count of all unique paths from
886 length 1 through 8 in IDsAndValuesString format and create a SamplePLFPCount.csv file
887 containing sequential compound IDs along with fingerprints vector strings data, type:
888 .PP
889 .Vb 2
890 \& % PathLengthFingerprints.pl \-m PathLengthCount \-o \-r SamplePLFPCount
891 \& Sample.sdf
892 .Ve
893 .PP
894 To generate path length fingerprints corresponding to count of all unique paths from
895 length 1 through 8 in IDsAndValuesString format using E\-state atom types and
896 create a SamplePLFPCount.csv file containing sequential compound IDs along with fingerprints
897 vector strings data, type:
898 .PP
899 .Vb 2
900 \& % PathLengthFingerprints.pl \-m PathLengthCount \-\-AtomIdentifierType
901 \& EStateAtomTypes \-o \-r SamplePLFPCount Sample.sdf
902 .Ve
903 .PP
904 To generate path length fingerprints corresponding to count of all unique paths from
905 length 1 through 8 in IDsAndValuesString format using SLogP atom types and
906 create a SamplePLFPCount.csv file containing sequential compound IDs along with fingerprints
907 vector strings data, type:
908 .PP
909 .Vb 2
910 \& % PathLengthFingerprints.pl \-m PathLengthCount \-\-AtomIdentifierType
911 \& SLogPAtomTypes \-o \-r SamplePLFPCount Sample.sdf
912 .Ve
913 .PP
914 To generate path length fingerprints corresponding to count of all unique paths from
915 length 1 through 8 in IDsAndValuesString format and create a SamplePLFPCount.csv file
916 containing sequential compound IDs along with fingerprints vector strings data, type:
917 .PP
918 .Vb 2
919 \& % PathLengthFingerprints.pl \-m PathLengthCount \-\-VectorStringFormat
920 \& ValuesAndIDsPairsString \-o \-r SamplePLFPCount Sample.sdf
921 .Ve
922 .PP
923 To generate path length fingerprints corresponding to count of all unique paths from
924 length 1 through 8 in IDsAndValuesString format using \s-1AS\s0,X,BO as atomic invariants and
925 create a SamplePLFPCount.csv file containing sequential compound IDs along with fingerprints
926 vector strings data, type:
927 .PP
928 .Vb 3
929 \& % PathLengthFingerprints.pl \-m PathLengthCount \-\-AtomIdentifierType
930 \& AtomicInvariantsAtomTypes \-\-AtomicInvariantsToUse "AS,X,BO" \-o
931 \& \-r SamplePLFPCount Sample.sdf
932 .Ve
933 .PP
934 To generate path length fingerprints corresponding to count of all paths from
935 length 1 through 8 in IDsAndValuesString format and create a SamplePLFPCount.csv file
936 containing compound IDs from MolName line along with fingerprints vector strings data, type:
937 .PP
938 .Vb 3
939 \& % PathLengthFingerprints.pl \-m PathLengthCount \-\-UseUniquePaths No
940 \& \-o \-\-CompoundIDMode MolName \-r SamplePLFPCount \-\-UseUniquePaths No
941 \& Sample.sdf
942 .Ve
943 .PP
944 To generate path length fingerprints corresponding to all unique paths from length 1
945 through 8 in hexadecimal bit-vector string format of size 512 after folding and create
946 SamplePLFPHex.sdf, SamplePLFPHex.fpf, and SamplePLFPHex.sdf files containing sequential
947 compound IDs along with fingerprints bit-vector strings data, type:
948 .PP
949 .Vb 2
950 \& % PathLengthFingerprints.pl \-\-output all \-\-Fold Yes \-\-FoldedSize 512
951 \& \-o \-r SamplePLFPHex Sample.sdf
952 .Ve
953 .PP
954 To generate path length fingerprints corresponding to all unique paths from length 1
955 through 8 containing no rings and without sharing of bonds in hexadecimal bit-vector
956 string format of size 1024 and create a SamplePLFPHex.csv file containing sequential
957 compound IDs along with fingerprints bit-vector strings data and all data fields, type:
958 .PP
959 .Vb 2
960 \& % PathLengthFingerprints.pl \-p AtomPathsWithoutRings \-\-DataFieldsMode All
961 \& \-o \-r SamplePLFPHex Sample.sdf
962 .Ve
963 .PP
964 To generate path length fingerprints corresponding to all unique paths from length 1
965 through 8 containing rings and without sharing of bonds in hexadecimal bit-vector
966 string format of size 1024 and create a SamplePLFPHex.tsv file containing compound IDs
967 derived from combination of molecule name line and an explicit compound prefix
968 along with fingerprints bit-vector strings data and all data fields, type:
969 .PP
970 .Vb 4
971 \& % PathLengthFingerprints.pl \-p AtomPathsWithRings \-\-DataFieldsMode
972 \& CompoundID \-\-CompoundIDMode MolnameOrLabelPrefix \-\-CompoundID Cmpd
973 \& \-\-CompoundIDLabel MolID \-\-FingerprintsLabel PathLengthFP \-\-OutDelim Tab
974 \& \-r SamplePLFPHex \-o Sample.sdf
975 .Ve
976 .PP
977 To generate path length fingerprints corresponding to count of all unique paths from
978 length 1 through 8 in IDsAndValuesString format and create a SamplePLFPCount.csv file
979 containing sequential compound IDs along with fingerprints vector strings data using
980 aromaticity specified in \s-1SD\s0 file, type:
981 .PP
982 .Vb 2
983 \& % PathLengthFingerprints.pl \-m PathLengthCount \-\-DetectAromaticity No
984 \& \-o \-r SamplePLFPCount Sample.sdf
985 .Ve
986 .PP
987 To generate path length fingerprints corresponding to all unique paths from length 2
988 through 6 in hexadecimal bit-vector string format of size 1024 and create a
989 SamplePLFPHex.csv file containing sequential compound IDs along with fingerprints
990 bit-vector strings data, type:
991 .PP
992 .Vb 2
993 \& % PathLengthFingerprints.pl \-\-MinPathLength 2 \-\-MaxPathLength 6
994 \& \-o \-r SamplePLFPHex Sample.sdf
995 .Ve
996 .SH "AUTHOR"
997 .IX Header "AUTHOR"
998 Manish Sud <msud@san.rr.com>
999 .SH "SEE ALSO"
1000 .IX Header "SEE ALSO"
1001 InfoFingerprintsFiles.pl, SimilarityMatricesFingerprints.pl, AtomNeighborhoodsFingerprints.pl,
1002 ExtendedConnectivityFingerprints.pl, MACCSKeysFingerprints.pl,
1003 TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl,
1004 TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl
1005 .SH "COPYRIGHT"
1006 .IX Header "COPYRIGHT"
1007 Copyright (C) 2015 Manish Sud. All rights reserved.
1008 .PP
1009 This file is part of MayaChemTools.
1010 .PP
1011 MayaChemTools is free software; you can redistribute it and/or modify it under
1012 the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free
1013 Software Foundation; either version 3 of the License, or (at your option)
1014 any later version.