Mercurial > repos > deepakjadmin > mayatool3_test3
comparison mayachemtools/docs/scripts/man1/ExtendedConnectivityFingerprints.1 @ 0:73ae111cf86f draft
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 11:55:01 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:73ae111cf86f |
---|---|
1 .\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22) | |
2 .\" | |
3 .\" Standard preamble: | |
4 .\" ======================================================================== | |
5 .de Sp \" Vertical space (when we can't use .PP) | |
6 .if t .sp .5v | |
7 .if n .sp | |
8 .. | |
9 .de Vb \" Begin verbatim text | |
10 .ft CW | |
11 .nf | |
12 .ne \\$1 | |
13 .. | |
14 .de Ve \" End verbatim text | |
15 .ft R | |
16 .fi | |
17 .. | |
18 .\" Set up some character translations and predefined strings. \*(-- will | |
19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
20 .\" double quote, and \*(R" will give a right double quote. \*(C+ will | |
21 .\" give a nicer C++. Capital omega is used to do unbreakable dashes and | |
22 .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, | |
23 .\" nothing in troff, for use with C<>. | |
24 .tr \(*W- | |
25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
26 .ie n \{\ | |
27 . ds -- \(*W- | |
28 . ds PI pi | |
29 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
30 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
31 . ds L" "" | |
32 . ds R" "" | |
33 . ds C` "" | |
34 . ds C' "" | |
35 'br\} | |
36 .el\{\ | |
37 . ds -- \|\(em\| | |
38 . ds PI \(*p | |
39 . ds L" `` | |
40 . ds R" '' | |
41 'br\} | |
42 .\" | |
43 .\" Escape single quotes in literal strings from groff's Unicode transform. | |
44 .ie \n(.g .ds Aq \(aq | |
45 .el .ds Aq ' | |
46 .\" | |
47 .\" If the F register is turned on, we'll generate index entries on stderr for | |
48 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index | |
49 .\" entries marked with X<> in POD. Of course, you'll have to process the | |
50 .\" output yourself in some meaningful fashion. | |
51 .ie \nF \{\ | |
52 . de IX | |
53 . tm Index:\\$1\t\\n%\t"\\$2" | |
54 .. | |
55 . nr % 0 | |
56 . rr F | |
57 .\} | |
58 .el \{\ | |
59 . de IX | |
60 .. | |
61 .\} | |
62 .\" | |
63 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
64 .\" Fear. Run. Save yourself. No user-serviceable parts. | |
65 . \" fudge factors for nroff and troff | |
66 .if n \{\ | |
67 . ds #H 0 | |
68 . ds #V .8m | |
69 . ds #F .3m | |
70 . ds #[ \f1 | |
71 . ds #] \fP | |
72 .\} | |
73 .if t \{\ | |
74 . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
75 . ds #V .6m | |
76 . ds #F 0 | |
77 . ds #[ \& | |
78 . ds #] \& | |
79 .\} | |
80 . \" simple accents for nroff and troff | |
81 .if n \{\ | |
82 . ds ' \& | |
83 . ds ` \& | |
84 . ds ^ \& | |
85 . ds , \& | |
86 . ds ~ ~ | |
87 . ds / | |
88 .\} | |
89 .if t \{\ | |
90 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
91 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
92 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
93 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
94 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
95 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
96 .\} | |
97 . \" troff and (daisy-wheel) nroff accents | |
98 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
99 .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
100 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
101 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
102 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
103 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
104 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
105 .ds ae a\h'-(\w'a'u*4/10)'e | |
106 .ds Ae A\h'-(\w'A'u*4/10)'E | |
107 . \" corrections for vroff | |
108 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
109 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
110 . \" for low resolution devices (crt and lpr) | |
111 .if \n(.H>23 .if \n(.V>19 \ | |
112 \{\ | |
113 . ds : e | |
114 . ds 8 ss | |
115 . ds o a | |
116 . ds d- d\h'-1'\(ga | |
117 . ds D- D\h'-1'\(hy | |
118 . ds th \o'bp' | |
119 . ds Th \o'LP' | |
120 . ds ae ae | |
121 . ds Ae AE | |
122 .\} | |
123 .rm #[ #] #H #V #F C | |
124 .\" ======================================================================== | |
125 .\" | |
126 .IX Title "EXTENDEDCONNECTIVITYFINGERPRINTS 1" | |
127 .TH EXTENDEDCONNECTIVITYFINGERPRINTS 1 "2015-03-29" "perl v5.14.2" "MayaChemTools" | |
128 .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
129 .\" way too many mistakes in technical documents. | |
130 .if n .ad l | |
131 .nh | |
132 .SH "NAME" | |
133 ExtendedConnectivityFingerprints.pl \- Generate extended connectivity fingerprints for SD files | |
134 .SH "SYNOPSIS" | |
135 .IX Header "SYNOPSIS" | |
136 ExtendedConnectivityFingerprints.pl SDFile(s)... | |
137 .PP | |
138 ExtendedConnectivityFingerprints.pl [\fB\-\-AromaticityModel\fR \fIAromaticityModelType\fR] | |
139 [\fB\-a, \-\-AtomIdentifierType\fR \fIAtomicInvariantsAtomTypes\fR] | |
140 [\fB\-\-AtomicInvariantsToUse\fR \fI\*(L"AtomicInvariant,AtomicInvariant...\*(R"\fR] | |
141 [\fB\-\-FunctionalClassesToUse\fR \fI\*(L"FunctionalClass1,FunctionalClass2...\*(R"\fR] | |
142 [\fB\-\-BitsOrder\fR \fIAscending | Descending\fR] [\fB\-b, \-\-BitStringFormat\fR \fIBinaryString | HexadecimalString\fR] | |
143 [\fB\-\-CompoundID\fR \fIDataFieldName or LabelPrefixString\fR] [\fB\-\-CompoundIDLabel\fR \fItext\fR] | |
144 [\fB\-\-CompoundIDMode\fR] [\fB\-\-DataFields\fR \fI\*(L"FieldLabel1,FieldLabel2,...\*(R"\fR] | |
145 [\fB\-d, \-\-DataFieldsMode\fR \fIAll | Common | Specify | CompoundID\fR] [\fB\-f, \-\-Filter\fR \fIYes | No\fR] | |
146 [\fB\-\-FingerprintsLabel\fR \fItext\fR] [\fB\-h, \-\-help\fR] [\fB\-k, \-\-KeepLargestComponent\fR \fIYes | No\fR] | |
147 [\fB\-m, \-\-mode\fR \fIExtendedConnectivity | ExtendedConnecticityCount | ExtendedConnecticityBits\fR] | |
148 [\fB\-n, \-\-NeighborhoodRadius\fR \fInumber\fR] [\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR] [\fB\-\-output\fR \fI\s-1SD\s0 | \s-1FP\s0 | text | all\fR] | |
149 [\fB\-o, \-\-overwrite\fR] [\fB\-q, \-\-quote\fR \fIYes | No\fR] [\fB\-r, \-\-root\fR \fIRootName\fR] [\fB\-s, \-\-size\fR \fInumber\fR] | |
150 [\fB\-\-UsePerlCoreRandom\fR \fIYes | No\fR] | |
151 [\fB\-v, \-\-VectorStringFormat\fR \fIIDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString\fR] | |
152 [\fB\-w, \-\-WorkingDir\fR dirname] SDFile(s)... | |
153 .SH "DESCRIPTION" | |
154 .IX Header "DESCRIPTION" | |
155 Generate extended connectivity fingerprints [ Ref 48, Ref 52 ] for \fISDFile(s)\fR and create appropriate | |
156 \&\s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) containing fingerprints vector strings corresponding to molecular fingerprints. | |
157 .PP | |
158 Multiple SDFile names are separated by spaces. The valid file extensions are \fI.sdf\fR | |
159 and \fI.sd\fR. All other file names are ignored. All the \s-1SD\s0 files in a current directory | |
160 can be specified either by \fI*.sdf\fR or the current directory name. | |
161 .PP | |
162 The current release of MayaChemTools supports generation of extended connectivity fingerprints | |
163 corresponding to following \fB\-a, \-\-AtomIdentifierTypes\fR: | |
164 .PP | |
165 .Vb 3 | |
166 \& AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
167 \& FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
168 \& SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
169 .Ve | |
170 .PP | |
171 Based on values specified for \fB\-a, \-\-AtomIdentifierType\fR, \fB\-\-AtomicInvariantsToUse\fR | |
172 and \fB\-\-FunctionalClassesToUse\fR, initial atom types are assigned to all non-hydrogen atoms in | |
173 a molecule and these atom types strings are converted into initial atom identifier integers using | |
174 \&\fBTextUtil::HashCode\fR function. The duplicate atom identifiers are removed. | |
175 .PP | |
176 For \fB\-n, \-\-NeighborhoodRadius\fR value of \fI0\fR, the initial set of unique atom identifiers comprises | |
177 the molecule fingerprints. Otherwise, atom neighborhoods are generated for each non-hydrogen | |
178 atom up to specified \fB\-n, \-\-NeighborhoodRadius\fR value. For each non-hydrogen central atom | |
179 at a specific radius, its neighbors at next radius level along with their bond orders and previously | |
180 calculated atom identifiers are collected which in turn are used to generate a new integer | |
181 atom identifier; the bond orders and atom identifier pairs list is first sorted by bond order | |
182 followed by atom identifiers to make these values graph invariant. | |
183 .PP | |
184 After integer atom identifiers have been generated for all non-hydrogen atoms at all specified | |
185 neighborhood radii, the duplicate integer atom identifiers corresponding to same hash code | |
186 value generated using \fBTextUtil::HashCode\fR are tracked by keeping the atom identifiers at | |
187 lower radius. Additionally, all structurally duplicate integer atom identifiers at each specified | |
188 radius are also tracked by identifying equivalent atoms and bonds corresponding to substructures | |
189 used for generating atom identifier and keeping integer atom identifier with lowest value. | |
190 .PP | |
191 For \fIExtendedConnnectivity\fR value of fingerprints \fB\-m, \-\-mode\fR, the duplicate identifiers are | |
192 removed from the list and the unique atom identifiers constitute the extended connectivity | |
193 fingerprints of a molecule. | |
194 .PP | |
195 For \fIExtendedConnnectivityCount\fR value of fingerprints \fB\-m, \-\-mode\fR, the occurrence of each | |
196 unique atom identifiers appears is counted and the unique atom identifiers along with their | |
197 count constitute the extended connectivity fingerprints of a molecule. | |
198 .PP | |
199 For \fIExtendedConnectivityBits\fR value of fingerprints \fB\-m, \-\-mode\fR, the unique atom identifiers | |
200 are used as a random number seed to generate a random integer value between 0 and \fB\-\-Size\fR which | |
201 in turn is used to set corresponding bits in the fingerprint bit-vector string. | |
202 .PP | |
203 Example of \fI\s-1SD\s0\fR file containing extended connectivity fingerprints string data: | |
204 .PP | |
205 .Vb 10 | |
206 \& ... ... | |
207 \& ... ... | |
208 \& $$$$ | |
209 \& ... ... | |
210 \& ... ... | |
211 \& ... ... | |
212 \& 41 44 0 0 0 0 0 0 0 0999 V2000 | |
213 \& \-3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 | |
214 \& ... ... | |
215 \& 2 3 1 0 0 0 0 | |
216 \& ... ... | |
217 \& M END | |
218 \& > <CmpdID> | |
219 \& Cmpd1 | |
220 \& | |
221 \& > <ExtendedConnectivityFingerprints> | |
222 \& FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radiu | |
223 \& s2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 66 | |
224 \& 6191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414087 | |
225 \& 99 49532520 64643108 79385615 96062769 273726379 564565671 855141035 90 | |
226 \& 6706094 988546669 1018231313 1032696425 1197507444 1331250018 133853... | |
227 \& | |
228 \& $$$$ | |
229 \& ... ... | |
230 \& ... ... | |
231 .Ve | |
232 .PP | |
233 Example of \fI\s-1FP\s0\fR file containing extended connectivity fingerprints string data: | |
234 .PP | |
235 .Vb 10 | |
236 \& # | |
237 \& # Package = MayaChemTools 7.4 | |
238 \& # Release Date = Oct 21, 2010 | |
239 \& # | |
240 \& # TimeStamp = Fri Mar 11 14:43:57 2011 | |
241 \& # | |
242 \& # FingerprintsStringType = FingerprintsVector | |
243 \& # | |
244 \& # Description = ExtendedConnectivity:AtomicInvariantsAtomTypes:Radius2 | |
245 \& # VectorStringFormat = ValuesString | |
246 \& # VectorValuesType = AlphaNumericalValues | |
247 \& # | |
248 \& Cmpd1 60;73555770 333564680 352413391 666191900 1001270906 137167432... | |
249 \& Cmpd2 41;73555770 333564680 666191900 1142173602 1363635752 14814699... | |
250 \& ... ... | |
251 \& ... .. | |
252 .Ve | |
253 .PP | |
254 Example of \s-1CSV\s0 \fIText\fR file containing extended connectivity fingerprints string data: | |
255 .PP | |
256 .Vb 8 | |
257 \& "CompoundID","ExtendedConnectivityFingerprints" | |
258 \& "Cmpd1","FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTy | |
259 \& pes:Radius2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352 | |
260 \& 413391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
261 \& 2141408799 49532520 64643108 79385615 96062769 273726379 564565671 8551 | |
262 \& 41035 906706094 988546669 1018231313 1032696425 1197507444 13312500..." | |
263 \& ... ... | |
264 \& ... ... | |
265 .Ve | |
266 .PP | |
267 The current release of MayaChemTools generates the following types of extended connectivity | |
268 fingerprints vector strings: | |
269 .PP | |
270 .Vb 6 | |
271 \& FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi | |
272 \& us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 | |
273 \& 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414 | |
274 \& 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103 | |
275 \& 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338 | |
276 \& 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303... | |
277 \& | |
278 \& FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes | |
279 \& :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524 | |
280 \& 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
281 \& 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...; | |
282 \& 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2 | |
283 \& 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 | |
284 \& | |
285 \& FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
286 \& es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100 | |
287 \& 0000000001010000000110000011000000000000100000000000000000000000100001 | |
288 \& 1000000110000000000000000000000000010011000000000000000000000000010000 | |
289 \& 0000000000000000000000000010000000000000000001000000000000000000000000 | |
290 \& 0000000000010000100001000000000000101000000000000000100000000000000... | |
291 \& | |
292 \& FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
293 \& es:Radius2;1024;HexadecimalString;Ascending;000000010050c0600800000803 | |
294 \& 0300000091000004000000020000100000000124008200020000000040020000000000 | |
295 \& 2080000000820040010020000000008040000000000080001000000000400000000000 | |
296 \& 4040000090000061010000000800200000000000001400000000020080000000000020 | |
297 \& 00008020200000408000 | |
298 \& | |
299 \& FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu | |
300 \& s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8 | |
301 \& 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567 | |
302 \& 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012 | |
303 \& 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455 | |
304 \& 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404... | |
305 \& | |
306 \& FingerprintsVector;ExtendedConnectivityCount:FunctionalClassAtomTypes: | |
307 \& Radius2;57;NumericalValues;IDsAndValuesString;24769214 508787397 85039 | |
308 \& 3286 862102353 981185303 1231636850 1649386610 1941540674 263599683 32 | |
309 \& 9205671 571109041 639579325 683993318 723853089 810600886 885767127...; | |
310 \& 1 1 1 10 2 22 3 1 3 3 1 1 1 3 2 2 1 2 2 2 3 1 1 1 1 1 14 1 1 1 1 1 1 2 | |
311 \& 1 2 1 1 2 2 1 1 2 1 1 1 2 1 1 2 1 1 1 1 1 1 1 | |
312 \& | |
313 \& FingerprintsBitVector;ExtendedConnectivityBits:FunctionalClassAtomType | |
314 \& s:Radius2;1024;BinaryString;Ascending;00000000000000000000100000000000 | |
315 \& 0000000001000100000000001000000000000000000000000000000000101000000010 | |
316 \& 0000001000000000010000000000000000000000000000000000000000000000000100 | |
317 \& 0000000000001000000000000001000000000001001000000000000000000000000000 | |
318 \& 0000000000000000100000000000001000000000000000000000000000000000000... | |
319 \& | |
320 \& FingerprintsVector;ExtendedConnectivity:DREIDINGAtomTypes:Radius2;56;A | |
321 \& lphaNumericalValues;ValuesString;280305427 357928343 721790579 1151822 | |
322 \& 898 1207111054 1380963747 1568213839 1603445250 4559268 55012922 18094 | |
323 \& 0813 335715751 534801009 684609658 829361048 972945982 999881534 10076 | |
324 \& 55741 1213692591 1222032501 1224517934 1235687794 1244268533 152812070 | |
325 \& 0 1629595024 1856308891 1978806036 2001865095 2096549435 172675415 ... | |
326 \& | |
327 \& FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp | |
328 \& haNumericalValues;ValuesString;25189973 528584866 662581668 671034184 | |
329 \& 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450 | |
330 \& 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430 | |
331 \& 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134 | |
332 \& 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566... | |
333 \& | |
334 \& FingerprintsVector;ExtendedConnectivity:MMFF94AtomTypes:Radius2;64;Alp | |
335 \& haNumericalValues;ValuesString;224051550 746527773 998750766 103704190 | |
336 \& 2 1239701709 1248384926 1259447756 1521678386 1631549126 1909437580 20 | |
337 \& 37095052 2104274756 2117729376 8770364 31445800 81450228 314289324 344 | |
338 \& 041929 581773587 638555787 692022098 811840536 929651561 936421792 988 | |
339 \& 636432 1048624296 1054288509 1369487579 1454058929 1519352190 17271... | |
340 \& | |
341 \& FingerprintsVector;ExtendedConnectivity:SLogPAtomTypes:Radius2;71;Alph | |
342 \& aNumericalValues;ValuesString;78989290 116507218 489454042 888737940 1 | |
343 \& 162561799 1241797255 1251494264 1263717127 1471206899 1538061784 17654 | |
344 \& 07295 1795036542 1809833874 2020454493 2055310842 2117729376 11868981 | |
345 \& 56731842 149505242 184525155 196984339 288181334 481409282 556716568 6 | |
346 \& 41915747 679881756 721736571 794256218 908276640 992898760 10987549... | |
347 \& | |
348 \& FingerprintsVector;ExtendedConnectivity:SYBYLAtomTypes:Radius2;58;Alph | |
349 \& aNumericalValues;ValuesString;199957044 313356892 455463968 465982819 | |
350 \& 1225318176 1678585943 1883366064 1963811677 2117729376 113784599 19153 | |
351 \& 8837 196629033 263865277 416380653 477036669 681527491 730724924 90906 | |
352 \& 5537 1021959189 1133014972 1174311016 1359441203 1573452838 1661585138 | |
353 \& 1668649038 1684198062 1812312554 1859266290 1891651106 2072549404 ... | |
354 \& | |
355 \& FingerprintsVector;ExtendedConnectivity:TPSAAtomTypes:Radius2;47;Alpha | |
356 \& NumericalValues;ValuesString;20818206 259344053 862102353 1331904542 1 | |
357 \& 700688206 265614156 363161397 681332588 810600886 885767127 950172500 | |
358 \& 951454814 1059668746 1247054493 1382302230 1399502637 1805025917 19189 | |
359 \& 39561 2114677228 2126402271 8130483 17645742 32278373 149975755 160327 | |
360 \& 654 256360355 279492740 291251259 317592700 333763396 972105960 101... | |
361 \& | |
362 \& FingerprintsVector;ExtendedConnectivity:UFFAtomTypes:Radius2;56;AlphaN | |
363 \& umericalValues;ValuesString;280305427 357928343 721790579 1151822898 1 | |
364 \& 207111054 1380963747 1568213839 1603445250 4559268 55012922 180940813 | |
365 \& 335715751 534801009 684609658 829361048 972945982 999881534 1007655741 | |
366 \& 1213692591 1222032501 1224517934 1235687794 1244268533 1528120700 162 | |
367 \& 9595024 1856308891 1978806036 2001865095 2096549435 172675415 18344... | |
368 .Ve | |
369 .SH "OPTIONS" | |
370 .IX Header "OPTIONS" | |
371 .IP "\fB\-\-AromaticityModel\fR \fIMDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel\fR" 4 | |
372 .IX Item "--AromaticityModel MDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel" | |
373 Specify aromaticity model to use during detection of aromaticity. Possible values in the current | |
374 release are: \fIMDLAromaticityModel, TriposAromaticityModel, MMFFAromaticityModel, | |
375 ChemAxonBasicAromaticityModel, ChemAxonGeneralAromaticityModel, DaylightAromaticityModel | |
376 or MayaChemToolsAromaticityModel\fR. Default value: \fIMayaChemToolsAromaticityModel\fR. | |
377 .Sp | |
378 The supported aromaticity model names along with model specific control parameters | |
379 are defined in \fBAromaticityModelsData.csv\fR, which is distributed with the current release | |
380 and is available under \fBlib/data\fR directory. \fBMolecule.pm\fR module retrieves data from | |
381 this file during class instantiation and makes it available to method \fBDetectAromaticity\fR | |
382 for detecting aromaticity corresponding to a specific model. | |
383 .IP "\fB\-a, \-\-AtomIdentifierType\fR \fIAtomicInvariantsAtomTypes | FunctionalClassAtomTypes | DREIDINGAtomTypes | EStateAtomTypes | MMFF94AtomTypes | SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes\fR" 4 | |
384 .IX Item "-a, --AtomIdentifierType AtomicInvariantsAtomTypes | FunctionalClassAtomTypes | DREIDINGAtomTypes | EStateAtomTypes | MMFF94AtomTypes | SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes" | |
385 Specify atom identifier type to use for assignment of initial atom identifier to non-hydrogen | |
386 atoms during calculation of extended connectivity fingerprints [ Ref 48, Ref 52]. Possible values | |
387 in the current release are: \fIAtomicInvariantsAtomTypes, FunctionalClassAtomTypes, | |
388 DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, | |
389 TPSAAtomTypes, UFFAtomTypes\fR. Default value: \fIAtomicInvariantsAtomTypes\fR. | |
390 .ie n .IP "\fB\-\-AtomicInvariantsToUse\fR \fI""AtomicInvariant,AtomicInvariant...""\fR" 4 | |
391 .el .IP "\fB\-\-AtomicInvariantsToUse\fR \fI``AtomicInvariant,AtomicInvariant...''\fR" 4 | |
392 .IX Item "--AtomicInvariantsToUse AtomicInvariant,AtomicInvariant..." | |
393 This value is used during \fIAtomicInvariantsAtomTypes\fR value of \fBa, \-\-AtomIdentifierType\fR | |
394 option. It's a list of comma separated valid atomic invariant atom types. | |
395 .Sp | |
396 Possible values for atomic invarians are: \fI\s-1AS\s0, X, \s-1BO\s0, \s-1LBO\s0, \s-1SB\s0, \s-1DB\s0, \s-1TB\s0, | |
397 H, Ar, \s-1RA\s0, \s-1FC\s0, \s-1MN\s0, \s-1SM\s0\fR. Default value [ Ref 24 ]: \fI\s-1AS\s0,X,BO,H,FC,MN\fR. | |
398 .Sp | |
399 The atomic invariants abbreviations correspond to: | |
400 .Sp | |
401 .Vb 1 | |
402 \& AS = Atom symbol corresponding to element symbol | |
403 \& | |
404 \& X<n> = Number of non\-hydrogen atom neighbors or heavy atoms | |
405 \& BO<n> = Sum of bond orders to non\-hydrogen atom neighbors or heavy atoms | |
406 \& LBO<n> = Largest bond order of non\-hydrogen atom neighbors or heavy atoms | |
407 \& SB<n> = Number of single bonds to non\-hydrogen atom neighbors or heavy atoms | |
408 \& DB<n> = Number of double bonds to non\-hydrogen atom neighbors or heavy atoms | |
409 \& TB<n> = Number of triple bonds to non\-hydrogen atom neighbors or heavy atoms | |
410 \& H<n> = Number of implicit and explicit hydrogens for atom | |
411 \& Ar = Aromatic annotation indicating whether atom is aromatic | |
412 \& RA = Ring atom annotation indicating whether atom is a ring | |
413 \& FC<+n/\-n> = Formal charge assigned to atom | |
414 \& MN<n> = Mass number indicating isotope other than most abundant isotope | |
415 \& SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or | |
416 \& 3 (triplet) | |
417 .Ve | |
418 .Sp | |
419 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
420 .Sp | |
421 .Vb 1 | |
422 \& AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/\-n>.MN<n>.SM<n> | |
423 .Ve | |
424 .Sp | |
425 Except for \s-1AS\s0 which is a required atomic invariant in atom types, all other atomic invariants are | |
426 optional. Atom type specification doesn't include atomic invariants with zero or undefined values. | |
427 .Sp | |
428 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
429 are also allowed: | |
430 .Sp | |
431 .Vb 12 | |
432 \& X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
433 \& BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
434 \& LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
435 \& SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
436 \& DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
437 \& TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
438 \& H : NumOfImplicitAndExplicitHydrogens | |
439 \& Ar : Aromatic | |
440 \& RA : RingAtom | |
441 \& FC : FormalCharge | |
442 \& MN : MassNumber | |
443 \& SM : SpinMultiplicity | |
444 .Ve | |
445 .Sp | |
446 \&\fIAtomTypes::AtomicInvariantsAtomTypes\fR module is used to assign atomic invariant | |
447 atom types. | |
448 .IP "\fB\-\-BitsOrder\fR \fIAscending | Descending\fR" 4 | |
449 .IX Item "--BitsOrder Ascending | Descending" | |
450 Bits order to use during generation of fingerprints bit-vector string for \fIExtendedConnectivityBits\fR | |
451 value of \fB\-m, \-\-mode\fR option. Possible values: \fIAscending, Descending\fR. Default: \fIAscending\fR. | |
452 .Sp | |
453 \&\fIAscending\fR bit order which corresponds to first bit in each byte as the lowest bit as | |
454 opposed to the highest bit. | |
455 .Sp | |
456 Internally, bits are stored in \fIAscending\fR order using Perl vec function. Regardless | |
457 of machine order, big-endian or little-endian, vec function always considers first | |
458 string byte as the lowest byte and first bit within each byte as the lowest bit. | |
459 .IP "\fB\-b, \-\-BitStringFormat\fR \fIBinaryString | HexadecimalString\fR" 4 | |
460 .IX Item "-b, --BitStringFormat BinaryString | HexadecimalString" | |
461 Format of fingerprints bit-vector string data in output \s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) specified by | |
462 \&\fB\-\-output\fR used during \fIExtendedConnectivityBits\fR value of \fB\-m, \-\-mode\fR option. Possible | |
463 values: \fIBinaryString, HexadecimalString\fR. Default value: \fIBinaryString\fR. | |
464 .Sp | |
465 \&\fIBinaryString\fR corresponds to an \s-1ASCII\s0 string containing 1s and 0s. \fIHexadecimalString\fR | |
466 contains bit values in \s-1ASCII\s0 hexadecimal format. | |
467 .Sp | |
468 Examples: | |
469 .Sp | |
470 .Vb 6 | |
471 \& FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
472 \& es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100 | |
473 \& 0000000001010000000110000011000000000000100000000000000000000000100001 | |
474 \& 1000000110000000000000000000000000010011000000000000000000000000010000 | |
475 \& 0000000000000000000000000010000000000000000001000000000000000000000000 | |
476 \& 0000000000010000100001000000000000101000000000000000100000000000000... | |
477 \& | |
478 \& FingerprintsBitVector;ExtendedConnectivityBits:FunctionalClassAtomType | |
479 \& s:Radius2;1024;BinaryString;Ascending;00000000000000000000100000000000 | |
480 \& 0000000001000100000000001000000000000000000000000000000000101000000010 | |
481 \& 0000001000000000010000000000000000000000000000000000000000000000000100 | |
482 \& 0000000000001000000000000001000000000001001000000000000000000000000000 | |
483 \& 0000000000000000100000000000001000000000000000000000000000000000000... | |
484 .Ve | |
485 .ie n .IP "\fB\-\-FunctionalClassesToUse\fR \fI""FunctionalClass1,FunctionalClass2...""\fR" 4 | |
486 .el .IP "\fB\-\-FunctionalClassesToUse\fR \fI``FunctionalClass1,FunctionalClass2...''\fR" 4 | |
487 .IX Item "--FunctionalClassesToUse FunctionalClass1,FunctionalClass2..." | |
488 This value is used during \fIFunctionalClassAtomTypes\fR value of \fBa, \-\-AtomIdentifierType\fR | |
489 option. It's a list of comma separated valid functional classes. | |
490 .Sp | |
491 Possible values for atom functional classes are: \fIAr, \s-1CA\s0, H, \s-1HBA\s0, \s-1HBD\s0, Hal, \s-1NI\s0, \s-1PI\s0, \s-1RA\s0\fR. | |
492 Default value [ Ref 24 ]: \fI\s-1HBD\s0,HBA,PI,NI,Ar,Hal\fR. | |
493 .Sp | |
494 The functional class abbreviations correspond to: | |
495 .Sp | |
496 .Vb 9 | |
497 \& HBD: HydrogenBondDonor | |
498 \& HBA: HydrogenBondAcceptor | |
499 \& PI : PositivelyIonizable | |
500 \& NI : NegativelyIonizable | |
501 \& Ar : Aromatic | |
502 \& Hal : Halogen | |
503 \& H : Hydrophobic | |
504 \& RA : RingAtom | |
505 \& CA : ChainAtom | |
506 \& | |
507 \& Functional class atom type specification for an atom corresponds to: | |
508 \& | |
509 \& Ar.CA.H.HBA.HBD.Hal.NI.PI.RA | |
510 .Ve | |
511 .Sp | |
512 \&\fIAtomTypes::FunctionalClassAtomTypes\fR module is used to assign functional class atom | |
513 types. It uses following definitions [ Ref 60\-61, Ref 65\-66 ]: | |
514 .Sp | |
515 .Vb 4 | |
516 \& HydrogenBondDonor: NH, NH2, OH | |
517 \& HydrogenBondAcceptor: N[!H], O | |
518 \& PositivelyIonizable: +, NH2 | |
519 \& NegativelyIonizable: \-, C(=O)OH, S(=O)OH, P(=O)OH | |
520 .Ve | |
521 .IP "\fB\-\-CompoundID\fR \fIDataFieldName or LabelPrefixString\fR" 4 | |
522 .IX Item "--CompoundID DataFieldName or LabelPrefixString" | |
523 This value is \fB\-\-CompoundIDMode\fR specific and indicates how compound \s-1ID\s0 is generated. | |
524 .Sp | |
525 For \fIDataField\fR value of \fB\-\-CompoundIDMode\fR option, it corresponds to datafield label name | |
526 whose value is used as compound \s-1ID\s0; otherwise, it's a prefix string used for generating compound | |
527 IDs like LabelPrefixString<Number>. Default value, \fICmpd\fR, generates compound IDs which | |
528 look like Cmpd<Number>. | |
529 .Sp | |
530 Examples for \fIDataField\fR value of \fB\-\-CompoundIDMode\fR: | |
531 .Sp | |
532 .Vb 2 | |
533 \& MolID | |
534 \& ExtReg | |
535 .Ve | |
536 .Sp | |
537 Examples for \fILabelPrefix\fR or \fIMolNameOrLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR: | |
538 .Sp | |
539 .Vb 1 | |
540 \& Compound | |
541 .Ve | |
542 .Sp | |
543 The value specified above generates compound IDs which correspond to Compound<Number> | |
544 instead of default value of Cmpd<Number>. | |
545 .IP "\fB\-\-CompoundIDLabel\fR \fItext\fR" 4 | |
546 .IX Item "--CompoundIDLabel text" | |
547 Specify compound \s-1ID\s0 column label for \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) used during \fICompoundID\fR value | |
548 of \fB\-\-DataFieldsMode\fR option. Default: \fICompoundID\fR. | |
549 .IP "\fB\-\-CompoundIDMode\fR \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR" 4 | |
550 .IX Item "--CompoundIDMode DataField | MolName | LabelPrefix | MolNameOrLabelPrefix" | |
551 Specify how to generate compound IDs and write to \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) along with generated | |
552 fingerprints for \fI\s-1FP\s0 | text | all\fR values of \fB\-\-output\fR option: use a \fISDFile(s)\fR datafield value; | |
553 use molname line from \fISDFile(s)\fR; generate a sequential \s-1ID\s0 with specific prefix; use combination | |
554 of both MolName and LabelPrefix with usage of LabelPrefix values for empty molname lines. | |
555 .Sp | |
556 Possible values: \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR. | |
557 Default: \fILabelPrefix\fR. | |
558 .Sp | |
559 For \fIMolNameAndLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR, molname line in \fISDFile(s)\fR takes | |
560 precedence over sequential compound IDs generated using \fILabelPrefix\fR and only empty molname | |
561 values are replaced with sequential compound IDs. | |
562 .Sp | |
563 This is only used for \fICompoundID\fR value of \fB\-\-DataFieldsMode\fR option. | |
564 .ie n .IP "\fB\-\-DataFields\fR \fI""FieldLabel1,FieldLabel2,...""\fR" 4 | |
565 .el .IP "\fB\-\-DataFields\fR \fI``FieldLabel1,FieldLabel2,...''\fR" 4 | |
566 .IX Item "--DataFields FieldLabel1,FieldLabel2,..." | |
567 Comma delimited list of \fISDFiles(s)\fR data fields to extract and write to \s-1CSV/TSV\s0 text file(s) along | |
568 with generated fingerprints for \fItext | all\fR values of \fB\-\-output\fR option. | |
569 .Sp | |
570 This is only used for \fISpecify\fR value of \fB\-\-DataFieldsMode\fR option. | |
571 .Sp | |
572 Examples: | |
573 .Sp | |
574 .Vb 2 | |
575 \& Extreg | |
576 \& MolID,CompoundName | |
577 .Ve | |
578 .IP "\fB\-d, \-\-DataFieldsMode\fR \fIAll | Common | Specify | CompoundID\fR" 4 | |
579 .IX Item "-d, --DataFieldsMode All | Common | Specify | CompoundID" | |
580 Specify how data fields in \fISDFile(s)\fR are transferred to output \s-1CSV/TSV\s0 text file(s) along | |
581 with generated fingerprints for \fItext | all\fR values of \fB\-\-output\fR option: transfer all \s-1SD\s0 | |
582 data field; transfer \s-1SD\s0 data files common to all compounds; extract specified data fields; | |
583 generate a compound \s-1ID\s0 using molname line, a compound prefix, or a combination of both. | |
584 Possible values: \fIAll | Common | specify | CompoundID\fR. Default value: \fICompoundID\fR. | |
585 .IP "\fB\-f, \-\-Filter\fR \fIYes | No\fR" 4 | |
586 .IX Item "-f, --Filter Yes | No" | |
587 Specify whether to check and filter compound data in SDFile(s). Possible values: \fIYes or No\fR. | |
588 Default value: \fIYes\fR. | |
589 .Sp | |
590 By default, compound data is checked before calculating fingerprints and compounds containing | |
591 atom data corresponding to non-element symbols or no atom data are ignored. | |
592 .IP "\fB\-\-FingerprintsLabel\fR \fItext\fR" 4 | |
593 .IX Item "--FingerprintsLabel text" | |
594 \&\s-1SD\s0 data label or text file column label to use for fingerprints string in output \s-1SD\s0 or | |
595 \&\s-1CSV/TSV\s0 text file(s) specified by \fB\-\-output\fR. Default value: \fIExtendedConnectivityFingerprints\fR. | |
596 .IP "\fB\-h, \-\-help\fR" 4 | |
597 .IX Item "-h, --help" | |
598 Print this help message. | |
599 .IP "\fB\-k, \-\-KeepLargestComponent\fR \fIYes | No\fR" 4 | |
600 .IX Item "-k, --KeepLargestComponent Yes | No" | |
601 Generate fingerprints for only the largest component in molecule. Possible values: | |
602 \&\fIYes or No\fR. Default value: \fIYes\fR. | |
603 .Sp | |
604 For molecules containing multiple connected components, fingerprints can be generated | |
605 in two different ways: use all connected components or just the largest connected | |
606 component. By default, all atoms except for the largest connected component are | |
607 deleted before generation of fingerprints. | |
608 .IP "\fB\-m, \-\-mode\fR \fIExtendedConnectivity | ExtendedConnectivityCount | ExtendedConnectivityBits\fR" 4 | |
609 .IX Item "-m, --mode ExtendedConnectivity | ExtendedConnectivityCount | ExtendedConnectivityBits" | |
610 Specify type of extended connectivity fingerprints to generate for molecules in \fISDFile(s)\fR. | |
611 Possible values: \fIExtendedConnectivity, ExtendedConnecticityCount or | |
612 ExtendedConnectivityBits\fR. Default value: \fIExtendedConnectivity\fR. | |
613 .Sp | |
614 For \fIExtendedConnnectivity\fR value of fingerprints \fB\-m, \-\-mode\fR, a fingerprint vector | |
615 containing unique atom identifiers constitute the extended connectivity fingerprints | |
616 of a molecule. | |
617 .Sp | |
618 For \fIExtendedConnnectivityCount\fR value of fingerprints \fB\-m, \-\-mode\fR, a fingerprint vector | |
619 containing unique atom identifiers along with their count constitute the extended connectivity | |
620 fingerprints of a molecule. | |
621 .Sp | |
622 For \fIExtendedConnnectivityBits\fR value of fingerprints \fB\-m, \-\-mode\fR, a fingerprint bit vector | |
623 indicating presence/absence of structurally unique atom identifiers constitute the extended | |
624 connectivity fingerprints of a molecule. | |
625 .IP "\fB\-n, \-\-NeighborhoodRadius\fR \fInumber\fR" 4 | |
626 .IX Item "-n, --NeighborhoodRadius number" | |
627 Atomic neighborhood radius for generating extended connectivity neighborhoods. Default | |
628 value: \fI2\fR. Valid values: >= 0. Neighborhood radius of zero correspond to just the list | |
629 of non-hydrogen atoms. | |
630 .Sp | |
631 Default value of \fI2\fR for atomic neighborhood radius generates extended connectivity | |
632 fingerprints corresponding to path length or diameter value of \fI4\fR [ Ref 52b ]. | |
633 .IP "\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR" 4 | |
634 .IX Item "--OutDelim comma | tab | semicolon" | |
635 Delimiter for output \s-1CSV/TSV\s0 text file(s). Possible values: \fIcomma, tab, or semicolon\fR | |
636 Default value: \fIcomma\fR. | |
637 .IP "\fB\-\-output\fR \fI\s-1SD\s0 | \s-1FP\s0 | text | all\fR" 4 | |
638 .IX Item "--output SD | FP | text | all" | |
639 Type of output files to generate. Possible values: \fI\s-1SD\s0, \s-1FP\s0, text, or all\fR. Default value: \fItext\fR. | |
640 .IP "\fB\-o, \-\-overwrite\fR" 4 | |
641 .IX Item "-o, --overwrite" | |
642 Overwrite existing files. | |
643 .IP "\fB\-q, \-\-quote\fR \fIYes | No\fR" 4 | |
644 .IX Item "-q, --quote Yes | No" | |
645 Put quote around column values in output \s-1CSV/TSV\s0 text file(s). Possible values: | |
646 \&\fIYes or No\fR. Default value: \fIYes\fR. | |
647 .IP "\fB\-r, \-\-root\fR \fIRootName\fR" 4 | |
648 .IX Item "-r, --root RootName" | |
649 New file name is generated using the root: <Root>.<Ext>. Default for new file names: | |
650 <SDFileName><ExtendedConnectivityFP>.<Ext>. The file type determines <Ext> | |
651 value. The sdf, fpf, csv, and tsv <Ext> values are used for \s-1SD\s0, \s-1FP\s0, comma/semicolon, and tab | |
652 delimited text files, respectively.This option is ignored for multiple input files. | |
653 .IP "\fB\-s, \-\-size\fR \fInumber\fR" 4 | |
654 .IX Item "-s, --size number" | |
655 Size of bit-vector to use during generation of fingerprints bit-vector string for | |
656 \&\fIExtendedConnectivityBits\fR value of \fB\-m, \-\-mode\fR. Default value: \fI1024\fR. | |
657 Valid values correspond to any positive integer which satisfies the following criteria: | |
658 power of 2, >= 32 and <= 2 ** 32. | |
659 .Sp | |
660 Examples: | |
661 .Sp | |
662 .Vb 3 | |
663 \& 512 | |
664 \& 1024 | |
665 \& 2048 | |
666 .Ve | |
667 .IP "\fB\-\-UsePerlCoreRandom\fR \fIYes | No\fR" 4 | |
668 .IX Item "--UsePerlCoreRandom Yes | No" | |
669 Specify whether to use Perl CORE::rand or MayaChemTools MathUtil::random function | |
670 during random number generation for setting bits in fingerprints bit-vector strings. Possible | |
671 values: \fIYes or No\fR. Default value: \fIYes\fR. | |
672 .Sp | |
673 \&\fINo\fR value option for \fB\-\-UsePerlCoreRandom\fR allows the generation of fingerprints | |
674 bit-vector strings which are same across different platforms. | |
675 .Sp | |
676 The random number generator implemented in MayaChemTools is a variant of | |
677 linear congruential generator (\s-1LCG\s0) as described by Miller et al. [ Ref 120 ]. | |
678 It is also referred to as Lehmer random number generator or Park-Miller | |
679 random number generator. | |
680 .Sp | |
681 Unlike Perl's core random number generator function rand, the random number | |
682 generator implemented in MayaChemTools, MathUtil::random, generates consistent | |
683 random values across different platforms for a specific random seed and leads | |
684 to generation of portable fingerprints bit-vector strings. | |
685 .IP "\fB\-v, \-\-VectorStringFormat\fR \fIValuesString | IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString\fR" 4 | |
686 .IX Item "-v, --VectorStringFormat ValuesString | IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString" | |
687 Format of fingerprints vector string data in output \s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) specified by | |
688 \&\fB\-\-output\fR used during <ExtendedConnectivityCount> value of \fB\-m, \-\-mode\fR option. Possible | |
689 values: \fIValuesString, IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | | |
690 ValuesAndIDsPairsString\fR. | |
691 .Sp | |
692 Default value during <ExtendedConnectivityCount> value of \fB\-m, \-\-mode\fR option: | |
693 \&\fIIDsAndValuesString\fR. | |
694 .Sp | |
695 Default value during <ExtendedConnectivity> value of \fB\-m, \-\-mode\fR option: \fIValuesString\fR. | |
696 .Sp | |
697 Examples: | |
698 .Sp | |
699 .Vb 6 | |
700 \& FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi | |
701 \& us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 | |
702 \& 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414 | |
703 \& 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103 | |
704 \& 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338 | |
705 \& 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303... | |
706 \& | |
707 \& FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes | |
708 \& :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524 | |
709 \& 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
710 \& 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...; | |
711 \& 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2 | |
712 \& 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 | |
713 .Ve | |
714 .IP "\fB\-w, \-\-WorkingDir\fR \fIDirName\fR" 4 | |
715 .IX Item "-w, --WorkingDir DirName" | |
716 Location of working directory. Default: current directory. | |
717 .SH "EXAMPLES" | |
718 .IX Header "EXAMPLES" | |
719 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
720 2 using atomic invariants atom types in vector string format and create a SampleECAIFP.csv | |
721 file containing sequential compound IDs along with fingerprints vector strings data, type: | |
722 .PP | |
723 .Vb 1 | |
724 \& % ExtendedConnectivityFingerprints.pl \-r SampleECAIFP \-o Sample.sdf | |
725 .Ve | |
726 .PP | |
727 To generate extended connectivity count fingerprints corresponding to neighborhood radius up to | |
728 2 using atomic invariants atom types in vector string format and create a SampleECAIFP.csv | |
729 file containing sequential compound IDs along with fingerprints vector strings data, type: | |
730 .PP | |
731 .Vb 2 | |
732 \& % ExtendedConnectivityFingerprints.pl \-m ExtendedConnectivityCount | |
733 \& \-r SampleECAIFP \-o Sample.sdf | |
734 .Ve | |
735 .PP | |
736 To generate extended connectivity bits fingerprints as hexadecimal bit-string corresponding to | |
737 neighborhood radius up to 2 using atomic invariants atom types in vector string format and | |
738 create a SampleECAIFP.csv file containing sequential compound IDs along with fingerprints | |
739 vector strings data, type: | |
740 .PP | |
741 .Vb 2 | |
742 \& % ExtendedConnectivityFingerprints.pl \-m ExtendedConnectivityBits | |
743 \& \-r SampleECAIFP \-o Sample.sdf | |
744 .Ve | |
745 .PP | |
746 To generate extended connectivity bits fingerprints as binary bit-string corresponding to | |
747 neighborhood radius up to 2 using atomic invariants atom types in vector string format and | |
748 create a SampleECAIFP.csv file containing sequential compound IDs along with fingerprints | |
749 vector strings data, type: | |
750 .PP | |
751 .Vb 2 | |
752 \& % ExtendedConnectivityFingerprints.pl \-m ExtendedConnectivityBits | |
753 \& \-\-BitStringFormat BinaryString \-r SampleECAIFP \-o Sample.sdf | |
754 .Ve | |
755 .PP | |
756 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
757 2 using atomic invariants atom types in vector string format and create SampleECAIFP.sdf, SampleECAIFP.fpf | |
758 and SampleECAIFP.csv files containing sequential compound IDs in \s-1CSV\s0 file along with fingerprints | |
759 vector strings data, type: | |
760 .PP | |
761 .Vb 2 | |
762 \& % ExtendedConnectivityFingerprints.pl \-\-output all \-r SampleECAIFP | |
763 \& \-o Sample.sdf | |
764 .Ve | |
765 .PP | |
766 To generate extended connectivity count fingerprints corresponding to neighborhood radius up to | |
767 2 using atomic invariants atom types in vector string format and create SampleECAIFP.sdf, SampleECAIFP.fpf | |
768 and SampleECAIFP.csv files containing sequential compound IDs in \s-1CSV\s0 file along with fingerprints | |
769 vector strings data, type: | |
770 .PP | |
771 .Vb 2 | |
772 \& % ExtendedConnectivityFingerprints.pl \-m ExtendedConnectivityCount | |
773 \& \-\-output all \-r SampleECAIFP \-o Sample.sdf | |
774 .Ve | |
775 .PP | |
776 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
777 2 using functional class atom types in vector string format and create a SampleECFCFP.csv file | |
778 containing sequential compound IDs along with fingerprints vector strings data, type: | |
779 .PP | |
780 .Vb 2 | |
781 \& % ExtendedConnectivityFingerprints.pl \-a FunctionalClassAtomTypes | |
782 \& \-r SampleECFCFP \-o Sample.sdf | |
783 .Ve | |
784 .PP | |
785 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
786 2 using \s-1DREIDING\s0 atom types in vector string format and create a SampleECFP.csv file | |
787 containing sequential compound IDs along with fingerprints vector strings data, type: | |
788 .PP | |
789 .Vb 2 | |
790 \& % ExtendedConnectivityFingerprints.pl \-a DREIDINGAtomTypes | |
791 \& \-r SampleECFP \-o Sample.sdf | |
792 .Ve | |
793 .PP | |
794 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
795 2 using E\-state atom types in vector string format and create a SampleECFP.csv file | |
796 containing sequential compound IDs along with fingerprints vector strings data, type: | |
797 .PP | |
798 .Vb 2 | |
799 \& % ExtendedConnectivityFingerprints.pl \-a EStateAtomTypes | |
800 \& \-r SampleECFP \-o Sample.sdf | |
801 .Ve | |
802 .PP | |
803 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
804 2 using \s-1MMFF94\s0 atom types in vector string format and create a SampleECFP.csv file | |
805 containing sequential compound IDs along with fingerprints vector strings data, type: | |
806 .PP | |
807 .Vb 2 | |
808 \& % ExtendedConnectivityFingerprints.pl \-a MMFF94AtomTypes | |
809 \& \-r SampleECFP \-o Sample.sdf | |
810 .Ve | |
811 .PP | |
812 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
813 2 using SLogP atom types in vector string format and create a SampleECFP.csv file | |
814 containing sequential compound IDs along with fingerprints vector strings data, type: | |
815 .PP | |
816 .Vb 2 | |
817 \& % ExtendedConnectivityFingerprints.pl \-a SLogPAtomTypes | |
818 \& \-r SampleECFP \-o Sample.sdf | |
819 .Ve | |
820 .PP | |
821 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
822 2 using \s-1SYBYL\s0 atom types in vector string format and create a SampleECFP.csv file | |
823 containing sequential compound IDs along with fingerprints vector strings data, type: | |
824 .PP | |
825 .Vb 2 | |
826 \& % ExtendedConnectivityFingerprints.pl \-a SYBYLAtomTypes | |
827 \& \-r SampleECFP \-o Sample.sdf | |
828 .Ve | |
829 .PP | |
830 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
831 2 using \s-1TPSA\s0 atom types in vector string format and create a SampleECFP.csv file | |
832 containing sequential compound IDs along with fingerprints vector strings data, type: | |
833 .PP | |
834 .Vb 2 | |
835 \& % ExtendedConnectivityFingerprints.pl \-a TPSAAtomTypes | |
836 \& \-r SampleECFP \-o Sample.sdf | |
837 .Ve | |
838 .PP | |
839 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
840 2 using \s-1UFF\s0 atom types in vector string format and create a SampleECFP.csv file | |
841 containing sequential compound IDs along with fingerprints vector strings data, type: | |
842 .PP | |
843 .Vb 2 | |
844 \& % ExtendedConnectivityFingerprints.pl \-a UFFAtomTypes | |
845 \& \-r SampleECFP \-o Sample.sdf | |
846 .Ve | |
847 .PP | |
848 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
849 3 using atomic invariants atom types in vector string format and create a SampleECAIFP.csv | |
850 file containing sequential compound IDs along with fingerprints vector strings data, type: | |
851 .PP | |
852 .Vb 2 | |
853 \& % ExtendedConnectivityFingerprints.pl \-a AtomicInvariantsAtomTypes \-n 3 | |
854 \& \-r SampleECAIFP \-o Sample.sdf | |
855 .Ve | |
856 .PP | |
857 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
858 3 using functional class atom types in vector string format and create a SampleECFCFP.csv file | |
859 containing sequential compound IDs along with fingerprints vector strings data, type: | |
860 .PP | |
861 .Vb 2 | |
862 \& % ExtendedConnectivityFingerprints.pl \-a FunctionalClassAtomTypes \-n 3 | |
863 \& \-r SampleECFCFP \-o Sample.sdf | |
864 .Ve | |
865 .PP | |
866 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
867 2 using only \s-1AS\s0,X atomic invariants atom types in vector string format and create a | |
868 SampleECAIFP.csv file containing sequential compound IDs along with fingerprints vector | |
869 strings data, type: | |
870 .PP | |
871 .Vb 2 | |
872 \& % ExtendedConnectivityFingerprints.pl \-a AtomicInvariantsAtomTypes | |
873 \& \-\-AtomicInvariantsToUse "AS,X" \-r SampleECAIFP \-o Sample.sdf | |
874 .Ve | |
875 .PP | |
876 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
877 2 using only \s-1HBD\s0,HBA functional class atom types in vector string format and create a | |
878 SampleECFCFP.csv file containing sequential compound IDs along with fingerprints vector | |
879 strings data, type: | |
880 .PP | |
881 .Vb 2 | |
882 \& % ExtendedConnectivityFingerprints.pl \-a FunctionalClassAtomTypes | |
883 \& \-\-FunctionalClassesToUse "HBD,HBA" \-r SampleECFCFP \-o Sample.sdf | |
884 .Ve | |
885 .PP | |
886 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
887 2 using atomic invariants atom types in vector string format and create a SampleECAIFP.csv | |
888 file containing compound \s-1ID\s0 from molecule name line along with fingerprints vector strings | |
889 data, type: | |
890 .PP | |
891 .Vb 3 | |
892 \& % ExtendedConnectivityFingerprints.pl \-a AtomicInvariantsAtomTypes | |
893 \& \-\-DataFieldsMode CompoundID \-CompoundIDMode MolName | |
894 \& \-r SampleECAIFP \-o Sample.sdf | |
895 .Ve | |
896 .PP | |
897 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
898 2 using functional class atom types in vector string format and create a SampleECFCFP.csv | |
899 file containing compound IDs using specified data field along with fingerprints vector strings | |
900 data, type: | |
901 .PP | |
902 .Vb 3 | |
903 \& % ExtendedConnectivityFingerprints.pl \-a FunctionalClassAtomTypes | |
904 \& \-\-DataFieldsMode CompoundID \-CompoundIDMode DataField \-\-CompoundID Mol_ID | |
905 \& \-r SampleECFCFP \-o Sample.sdf | |
906 .Ve | |
907 .PP | |
908 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
909 2 using atomic invariants atom types in vector string format and create a SampleECAIFP.tsv | |
910 file containing compound \s-1ID\s0 using combination of molecule name line and an explicit compound | |
911 prefix along with fingerprints vector strings data, type: | |
912 .PP | |
913 .Vb 3 | |
914 \& % ExtendedConnectivityFingerprints.pl \-a AtomicInvariantsAtomTypes | |
915 \& \-\-DataFieldsMode CompoundID \-CompoundIDMode MolnameOrLabelPrefix | |
916 \& \-\-CompoundID Cmpd \-\-CompoundIDLabel MolID \-r SampleECAIFP \-o Sample.sdf | |
917 .Ve | |
918 .PP | |
919 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
920 2 using functional class atom types in vector string format and create a SampleECFCFP.csv | |
921 file containing specific data fields columns along with fingerprints vector strings | |
922 data, type: | |
923 .PP | |
924 .Vb 3 | |
925 \& % ExtendedConnectivityFingerprints.pl \-a FunctionalClassAtomTypes | |
926 \& \-\-DataFieldsMode Specify \-\-DataFields Mol_ID \-r SampleECFCFP | |
927 \& \-o Sample.sdf | |
928 .Ve | |
929 .PP | |
930 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
931 2 using atomic invariants atom types in vector string format and create a SampleECAIFP.tsv | |
932 file containing common data fields columns along with fingerprints vector strings data, type: | |
933 .PP | |
934 .Vb 2 | |
935 \& % ExtendedConnectivityFingerprints.pl \-a AtomicInvariantsAtomTypes | |
936 \& \-\-DataFieldsMode Common \-r SampleECAIFP \-o Sample.sdf | |
937 .Ve | |
938 .PP | |
939 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
940 2 using functional class atom types in vector string format and create SampleECFCFP.sdf, SampleECFCFP.fpf | |
941 and SampleECFCFP.csv files containing all data fields columns in \s-1CSV\s0 file along with fingerprints | |
942 vector strings data, type: | |
943 .PP | |
944 .Vb 3 | |
945 \& % ExtendedConnectivityFingerprints.pl \-a FunctionalClassAtomTypes | |
946 \& \-\-DataFieldsMode All \-\-output all \-r SampleECFCFP | |
947 \& \-o Sample.sdf | |
948 .Ve | |
949 .SH "AUTHOR" | |
950 .IX Header "AUTHOR" | |
951 Manish Sud <msud@san.rr.com> | |
952 .SH "SEE ALSO" | |
953 .IX Header "SEE ALSO" | |
954 InfoFingerprintsFiles.pl, SimilarityMatricesFingerprints.pl, AtomNeighborhoodsFingerprints.pl, | |
955 MACCSKeysFingerprints.pl, PathLengthFingerprints.pl, | |
956 TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl, | |
957 TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl | |
958 .SH "COPYRIGHT" | |
959 .IX Header "COPYRIGHT" | |
960 Copyright (C) 2015 Manish Sud. All rights reserved. | |
961 .PP | |
962 This file is part of MayaChemTools. | |
963 .PP | |
964 MayaChemTools is free software; you can redistribute it and/or modify it under | |
965 the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free | |
966 Software Foundation; either version 3 of the License, or (at your option) | |
967 any later version. |