comparison docs/scripts/man1/SimilarityMatricesFingerprints.1 @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 .\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22)
2 .\"
3 .\" Standard preamble:
4 .\" ========================================================================
5 .de Sp \" Vertical space (when we can't use .PP)
6 .if t .sp .5v
7 .if n .sp
8 ..
9 .de Vb \" Begin verbatim text
10 .ft CW
11 .nf
12 .ne \\$1
13 ..
14 .de Ve \" End verbatim text
15 .ft R
16 .fi
17 ..
18 .\" Set up some character translations and predefined strings. \*(-- will
19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
20 .\" double quote, and \*(R" will give a right double quote. \*(C+ will
21 .\" give a nicer C++. Capital omega is used to do unbreakable dashes and
22 .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
23 .\" nothing in troff, for use with C<>.
24 .tr \(*W-
25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
26 .ie n \{\
27 . ds -- \(*W-
28 . ds PI pi
29 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
30 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
31 . ds L" ""
32 . ds R" ""
33 . ds C` ""
34 . ds C' ""
35 'br\}
36 .el\{\
37 . ds -- \|\(em\|
38 . ds PI \(*p
39 . ds L" ``
40 . ds R" ''
41 'br\}
42 .\"
43 .\" Escape single quotes in literal strings from groff's Unicode transform.
44 .ie \n(.g .ds Aq \(aq
45 .el .ds Aq '
46 .\"
47 .\" If the F register is turned on, we'll generate index entries on stderr for
48 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
49 .\" entries marked with X<> in POD. Of course, you'll have to process the
50 .\" output yourself in some meaningful fashion.
51 .ie \nF \{\
52 . de IX
53 . tm Index:\\$1\t\\n%\t"\\$2"
54 ..
55 . nr % 0
56 . rr F
57 .\}
58 .el \{\
59 . de IX
60 ..
61 .\}
62 .\"
63 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
64 .\" Fear. Run. Save yourself. No user-serviceable parts.
65 . \" fudge factors for nroff and troff
66 .if n \{\
67 . ds #H 0
68 . ds #V .8m
69 . ds #F .3m
70 . ds #[ \f1
71 . ds #] \fP
72 .\}
73 .if t \{\
74 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
75 . ds #V .6m
76 . ds #F 0
77 . ds #[ \&
78 . ds #] \&
79 .\}
80 . \" simple accents for nroff and troff
81 .if n \{\
82 . ds ' \&
83 . ds ` \&
84 . ds ^ \&
85 . ds , \&
86 . ds ~ ~
87 . ds /
88 .\}
89 .if t \{\
90 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
91 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
92 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
93 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
94 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
95 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
96 .\}
97 . \" troff and (daisy-wheel) nroff accents
98 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
99 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
100 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
101 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
102 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
103 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
104 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
105 .ds ae a\h'-(\w'a'u*4/10)'e
106 .ds Ae A\h'-(\w'A'u*4/10)'E
107 . \" corrections for vroff
108 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
109 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
110 . \" for low resolution devices (crt and lpr)
111 .if \n(.H>23 .if \n(.V>19 \
112 \{\
113 . ds : e
114 . ds 8 ss
115 . ds o a
116 . ds d- d\h'-1'\(ga
117 . ds D- D\h'-1'\(hy
118 . ds th \o'bp'
119 . ds Th \o'LP'
120 . ds ae ae
121 . ds Ae AE
122 .\}
123 .rm #[ #] #H #V #F C
124 .\" ========================================================================
125 .\"
126 .IX Title "SIMILARITYMATRICESFINGERPRINTS 1"
127 .TH SIMILARITYMATRICESFINGERPRINTS 1 "2015-03-29" "perl v5.14.2" "MayaChemTools"
128 .\" For nroff, turn off justification. Always turn off hyphenation; it makes
129 .\" way too many mistakes in technical documents.
130 .if n .ad l
131 .nh
132 .SH "NAME"
133 SimilarityMatricesFingerprints.pl \- Calculate similarity matrices using fingerprints strings data in SD, FP and CSV/TSV text file(s)
134 .SH "SYNOPSIS"
135 .IX Header "SYNOPSIS"
136 SimilarityMatricesFingerprints.pl SDFile(s) FPFile(s) TextFile(s)...
137 .PP
138 SimilarityMatricesFingerprints.pl [\fB\-\-alpha\fR \fInumber\fR] [\fB\-\-beta\fR \fInumber\fR]
139 [\fB\-b, \-\-BitVectorComparisonMode\fR \fIAll | \*(L"TanimotoSimilarity,[ TverskySimilarity, ... ]\*(R"\fR]
140 [\fB\-c, \-\-ColMode\fR \fIColNum | ColLabel\fR] [\fB\-\-CompoundIDCol\fR \fIcol number | col name\fR]
141 [\fB\-\-CompoundIDPrefix\fR \fItext\fR] [\fB\-\-CompoundIDField\fR \fIDataFieldName\fR]
142 [\fB\-\-CompoundIDMode\fR \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR]
143 [\fB\-d, \-\-detail\fR \fIInfoLevel\fR] [\fB\-f, \-\-fast\fR] [\fB\-\-FingerprintsCol\fR \fIcol number | col name\fR]
144 [\fB\-\-FingerprintsField\fR \fIFieldLabel\fR] [\fB\-h, \-\-help\fR] [\fB\-\-InDelim\fR \fIcomma | semicolon\fR]
145 [\fB\-\-InputDataMode\fR \fILoadInMemory | ScanFile\fR]
146 [\fB\-m, \-\-mode\fR \fIAutoDetect | FingerprintsBitVectorString | FingerprintsVectorString\fR]
147 [\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR] [\fB\-\-OutMatrixFormat\fR \fIRowsAndColumns | IDPairsAndValue\fR]
148 [\fB\-\-OutMatrixType\fR \fIFullMatrix | UpperTriangularMatrix | LowerTriangularMatrix\fR]
149 [\fB\-o, \-\-overwrite\fR] [\fB\-p, \-\-precision\fR \fInumber\fR]
150 [\fB\-q, \-\-quote\fR \fIYes | No\fR] [\fB\-r, \-\-root\fR \fIRootName\fR]
151 [\fB\-v, \-\-VectorComparisonMode\fR \fIAll | \*(L"TanimotoSimilairy, [ ManhattanDistance, ...]\*(R"\fR]
152 [\fB\-\-VectorComparisonFormulism\fR \fIAll | \*(L"AlgebraicForm, [BinaryForm, SetTheoreticForm]\*(R"\fR]
153 [\fB\-w, \-\-WorkingDir\fR dirname] SDFile(s) FPFile(s) TextFile(s)...
154 .SH "DESCRIPTION"
155 .IX Header "DESCRIPTION"
156 Calculate similarity matrices using fingerprint bit-vector or vector strings data in \fI\s-1SD\s0, \s-1FP\s0
157 and \s-1CSV/TSV\s0\fR text file(s) and generate \s-1CSV/TSV\s0 text file(s) containing values for specified
158 similarity and distance coefficients.
159 .PP
160 The scripts SimilarityMatrixSDFiles.pl and SimilarityMatrixTextFiles.pl have been removed from the
161 current release of MayaChemTools and their functionality merged with this script.
162 .PP
163 The valid \fISDFile\fR extensions are \fI.sdf\fR and \fI.sd\fR. All \s-1SD\s0 files in a current directory
164 can be specified either by \fI*.sdf\fR or the current directory name.
165 .PP
166 The valid \fIFPFile\fR extensions are \fI.fpf\fR and \fI.fp\fR. All \s-1FP\s0 files in a current directory
167 can be specified either by \fI*.fpf\fR or the current directory name.
168 .PP
169 The valid \fITextFile\fR extensions are \fI.csv\fR and \fI.tsv\fR for comma/semicolon and tab
170 delimited text files respectively. All other file names are ignored. All text files in a
171 current directory can be specified by \fI*.csv\fR, \fI*.tsv\fR, or the current directory
172 name. The \fB\-\-indelim\fR option determines the format of \fITextFile(s)\fR. Any file
173 which doesn't correspond to the format indicated by \fB\-\-indelim\fR option is ignored.
174 .PP
175 Example of \fI\s-1FP\s0\fR file containing fingerprints bit-vector string data:
176 .PP
177 .Vb 10
178 \& #
179 \& # Package = MayaChemTools 7.4
180 \& # ReleaseDate = Oct 21, 2010
181 \& #
182 \& # TimeStamp = Mon Mar 7 15:14:01 2011
183 \& #
184 \& # FingerprintsStringType = FingerprintsBitVector
185 \& #
186 \& # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
187 \& # Size = 1024
188 \& # BitStringFormat = HexadecimalString
189 \& # BitsOrder = Ascending
190 \& #
191 \& Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510...
192 \& Cmpd2 000000249400840040100042011001001980410c000000001010088001120...
193 \& ... ...
194 \& ... ..
195 .Ve
196 .PP
197 Example of \fI\s-1FP\s0\fR file containing fingerprints vector string data:
198 .PP
199 .Vb 10
200 \& #
201 \& # Package = MayaChemTools 7.4
202 \& # ReleaseDate = Oct 21, 2010
203 \& #
204 \& # TimeStamp = Mon Mar 7 15:14:01 2011
205 \& #
206 \& # FingerprintsStringType = FingerprintsVector
207 \& #
208 \& # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
209 \& # VectorStringFormat = IDsAndValuesString
210 \& # VectorValuesType = NumericalValues
211 \& #
212 \& Cmpd1 338;C F N O C:C C:N C=O CC CF CN CO C:C:C C:C:N C:CC C:CF C:CN C:
213 \& N:C C:NC CC:N CC=O CCC CCN CCO CNC NC=O O=CO C:C:C:C C:C:C:N C:C:CC...;
214 \& 33 1 2 5 21 2 2 12 1 3 3 20 2 10 2 2 1 2 2 2 8 2 5 1 1 1 19 2 8 2 2 2 2
215 \& 6 2 2 2 2 2 2 2 2 3 2 2 1 4 1 5 1 1 18 6 2 2 1 2 10 2 1 2 1 2 2 2 2 ...
216 \& Cmpd2 103;C N O C=N C=O CC CN CO CC=O CCC CCN CCO CNC N=CN NC=O NCN O=C
217 \& O C CC=O CCCC CCCN CCCO CCNC CNC=N CNC=O CNCN CCCC=O CCCCC CCCCN CC...;
218 \& 15 4 4 1 2 13 5 2 2 15 5 3 2 2 1 1 1 2 17 7 6 5 1 1 1 2 15 8 5 7 2 2 2 2
219 \& 1 2 1 1 3 15 7 6 8 3 4 4 3 2 2 1 2 3 14 2 4 7 4 4 4 4 1 1 1 2 1 1 1 ...
220 \& ... ...
221 \& ... ...
222 .Ve
223 .PP
224 Example of \fI\s-1SD\s0\fR file containing fingerprints bit-vector string data:
225 .PP
226 .Vb 10
227 \& ... ...
228 \& ... ...
229 \& $$$$
230 \& ... ...
231 \& ... ...
232 \& ... ...
233 \& 41 44 0 0 0 0 0 0 0 0999 V2000
234 \& \-3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
235 \& ... ...
236 \& 2 3 1 0 0 0 0
237 \& ... ...
238 \& M END
239 \& > <CmpdID>
240 \& Cmpd1
241 \&
242 \& > <PathLengthFingerprints>
243 \& FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLengt
244 \& h1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a49913991a66
245 \& 03130b0a19e8051c89184414953800cc2151082844a201042800130860308e8204d4028
246 \& 00831048940e44281c00060449a5000ac80c894114e006321264401600846c050164462
247 \& 08190410805000304a10205b0100e04c0038ba0fad0209c0ca8b1200012268b61c0026a
248 \& aa0660a11014a011d46
249 \&
250 \& $$$$
251 \& ... ...
252 \& ... ...
253 .Ve
254 .PP
255 Example of \s-1CSV\s0 \fIText\fR file containing fingerprints bit-vector string data:
256 .PP
257 .Vb 7
258 \& "CompoundID","PathLengthFingerprints"
259 \& "Cmpd1","FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes
260 \& :MinLength1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a4
261 \& 9913991a6603130b0a19e8051c89184414953800cc2151082844a20104280013086030
262 \& 8e8204d402800831048940e44281c00060449a5000ac80c894114e006321264401..."
263 \& ... ...
264 \& ... ...
265 .Ve
266 .PP
267 The current release of MayaChemTools supports the following types of fingerprint
268 bit-vector and vector strings:
269 .PP
270 .Vb 6
271 \& FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi
272 \& us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0\-C.X1.BO1.H3\-AT
273 \& C1:NR1\-C.X3.BO3.H1\-ATC1:NR2\-C.X1.BO1.H3\-ATC1:NR2\-C.X3.BO4\-ATC1 NR0\-C.X
274 \& 1.BO1.H3\-ATC1:NR1\-C.X3.BO3.H1\-ATC1:NR2\-C.X1.BO1.H3\-ATC1:NR2\-C.X3.BO4\-A
275 \& TC1 NR0\-C.X2.BO2.H2\-ATC1:NR1\-C.X2.BO2.H2\-ATC1:NR1\-C.X3.BO3.H1\-ATC1:NR2
276 \& \-C.X2.BO2.H2\-ATC1:NR2\-N.X3.BO3\-ATC1:NR2\-O.X1.BO1.H1\-ATC1 NR0\-C.X2.B...
277 \&
278 \& FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS
279 \& ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2
280 \& .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1
281 \& O.X1.BO2;2 4 14 3 10 1 1 1 3 2
282 \&
283 \& FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume
284 \& ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F
285 \& N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1
286 \&
287 \& FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN
288 \& umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C
289 \& 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N
290 \& 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8
291 \& O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1
292 \& 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0...
293 \&
294 \& FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs
295 \& AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN
296 \& H SsssCH;24.778 4.387 1.993 25.023 \-1.435 3.975 14.006 29.759 \-0.073 3
297 \& .024 \-2.270
298 \&
299 \& FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues;
300 \& ValuesString;0 0 0 0 0 0 0 3.975 0 \-0.073 0 0 24.778 \-2.270 0 0 \-1.435
301 \& 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1
302 \& 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
303 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0
304 \&
305 \& FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi
306 \& us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391
307 \& 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414
308 \& 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103
309 \& 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338
310 \& 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303...
311 \&
312 \& FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes
313 \& :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524
314 \& 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649
315 \& 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...;
316 \& 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2
317 \& 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1
318 \&
319 \& FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp
320 \& es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100
321 \& 0000000001010000000110000011000000000000100000000000000000000000100001
322 \& 1000000110000000000000000000000000010011000000000000000000000000010000
323 \& 0000000000000000000000000010000000000000000001000000000000000000000000
324 \& 0000000000010000100001000000000000101000000000000000100000000000000...
325 \&
326 \& FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu
327 \& s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8
328 \& 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567
329 \& 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012
330 \& 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455
331 \& 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404...
332 \&
333 \& FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp
334 \& haNumericalValues;ValuesString;25189973 528584866 662581668 671034184
335 \& 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450
336 \& 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430
337 \& 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134
338 \& 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566...
339 \&
340 \& FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
341 \& 0000000000000000000000000000000001001000010010000000010010000000011100
342 \& 0100101010111100011011000100110110000011011110100110111111111111011111
343 \& 11111111111110111000
344 \&
345 \& FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
346 \& 1110011111100101111111000111101100110000000000000011100010000000000000
347 \& 0000000000000000000000000000000000000000000000101000000000000000000000
348 \& 0000000000000000000000000000000000000000000000000000000000000000000000
349 \& 0000000000000000000000000000000000000011000000000000000000000000000000
350 \& 0000000000000000000000000000000000000000
351 \&
352 \& FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
353 \& ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
354 \& 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
355 \& 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
356 \& 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
357 \& 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
358 \&
359 \& FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
360 \& ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
361 \& 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
362 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
363 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
364 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
365 \&
366 \& FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
367 \& th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
368 \& 0100010101011000101001011100110001000010001001101000001001001001001000
369 \& 0010110100000111001001000001001010100100100000000011000000101001011100
370 \& 0010000001000101010100000100111100110111011011011000000010110111001101
371 \& 0101100011000000010001000011000010100011101100001000001000100000000...
372 \&
373 \& FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
374 \& 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
375 \& C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
376 \& 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
377 \& 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
378 \& 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
379 \&
380 \& FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt
381 \& h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1
382 \& 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N
383 \& 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1
384 \& CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR
385 \& OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ...
386 \&
387 \& FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD
388 \& istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1
389 \& .H3\-D1\-C.X3.BO3.H1 C.X2.BO2.H2\-D1\-C.X2.BO2.H2 C.X2.BO2.H2\-D1\-C.X3.BO3.
390 \& H1 C.X2.BO2.H2\-D1\-C.X3.BO4 C.X2.BO2.H2\-D1\-N.X3.BO3 C.X2.BO3.H1\-D1\-...;
391 \& 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1
392 \& 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1...
393 \&
394 \& FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi
395 \& stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar\-D1\-Ar
396 \& Ar\-D1\-Ar.HBA Ar\-D1\-HBD Ar\-D1\-Hal Ar\-D1\-None Ar.HBA\-D1\-None HBA\-D1\-NI H
397 \& BA\-D1\-None HBA.HBD\-D1\-NI HBA.HBD\-D1\-None HBD\-D1\-None NI\-D1\-None No...;
398 \& 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4
399 \& 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ...
400 \&
401 \& FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3
402 \& 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3\-C.X3.BO3.H1\-C.X3.BO4\-
403 \& C.X3.BO4 C.X1.BO1.H3\-C.X3.BO3.H1\-C.X3.BO4\-N.X3.BO3 C.X2.BO2.H2\-C.X2.BO
404 \& 2.H2\-C.X3.BO3.H1\-C.X2.BO2.H2 C.X2.BO2.H2\-C.X2.BO2.H2\-C.X3.BO3.H1\-O...;
405 \& 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1
406 \&
407 \& FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica
408 \& lValues;IDsAndValuesString;aaCH\-aaCH\-aaCH\-aaCH aaCH\-aaCH\-aaCH\-aasC aaC
409 \& H\-aaCH\-aasC\-aaCH aaCH\-aaCH\-aasC\-aasC aaCH\-aaCH\-aasC\-sF aaCH\-aaCH\-aasC\-
410 \& ssNH aaCH\-aasC\-aasC\-aasC aaCH\-aasC\-aasC\-aasN aaCH\-aasC\-ssNH\-dssC a...;
411 \& 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2
412 \&
413 \& FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M
414 \& inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1
415 \& .BO1.H3\-D1\-C.X1.BO1.H3\-D1\-C.X3.BO3.H1\-D2 C.X1.BO1.H3\-D1\-C.X2.BO2.H2\-D1
416 \& 0\-C.X3.BO4\-D9 C.X1.BO1.H3\-D1\-C.X2.BO2.H2\-D3\-N.X3.BO3\-D4 C.X1.BO1.H3\-D1
417 \& \-C.X2.BO2.H2\-D4\-C.X2.BO2.H2\-D5 C.X1.BO1.H3\-D1\-C.X2.BO2.H2\-D6\-C.X3....;
418 \& 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2
419 \& 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8...
420 \&
421 \& FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1
422 \& :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2\-D1\-C.2\-D9\-C
423 \& .3\-D10 C.2\-D1\-C.2\-D9\-C.ar\-D10 C.2\-D1\-C.3\-D1\-C.3\-D2 C.2\-D1\-C.3\-D10\-C.3\-
424 \& D9 C.2\-D1\-C.3\-D2\-C.3\-D3 C.2\-D1\-C.3\-D2\-C.ar\-D3 C.2\-D1\-C.3\-D3\-C.3\-D4 C.2
425 \& \-D1\-C.3\-D3\-N.ar\-D4 C.2\-D1\-C.3\-D3\-O.3\-D2 C.2\-D1\-C.3\-D4\-C.3\-D5 C.2\-D1\-C.
426 \& 3\-D5\-C.3\-D6 C.2\-D1\-C.3\-D5\-O.3\-D4 C.2\-D1\-C.3\-D6\-C.3\-D7 C.2\-D1\-C.3\-D7...
427 \&
428 \& FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min
429 \& Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H\-D1\-H H
430 \& \-D1\-NI HBA\-D1\-NI HBD\-D1\-NI H\-D2\-H H\-D2\-HBA H\-D2\-HBD HBA\-D2\-HBA HBA\-D2\-
431 \& HBD H\-D3\-H H\-D3\-HBA H\-D3\-HBD H\-D3\-NI HBA\-D3\-NI HBD\-D3\-NI H\-D4\-H H\-D4\-H
432 \& BA H\-D4\-HBD HBA\-D4\-HBA HBA\-D4\-HBD HBD\-D4\-HBD H\-D5\-H H\-D5\-HBA H\-D5\-...;
433 \& 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10
434 \& 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1
435 \&
436 \& FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
437 \& ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0
438 \& 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1
439 \& 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0
440 \& 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0
441 \& 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18...
442 \&
443 \& FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize:
444 \& MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1\-
445 \& Ar1\-Ar1 Ar1\-Ar1\-H1 Ar1\-Ar1\-HBA1 Ar1\-Ar1\-HBD1 Ar1\-H1\-H1 Ar1\-H1\-HBA1 Ar1
446 \& \-H1\-HBD1 Ar1\-HBA1\-HBD1 H1\-H1\-H1 H1\-H1\-HBA1 H1\-H1\-HBD1 H1\-HBA1\-HBA1 H1\-
447 \& HBA1\-HBD1 H1\-HBA1\-NI1 H1\-HBD1\-NI1 HBA1\-HBA1\-NI1 HBA1\-HBD1\-NI1 Ar1\-...;
448 \& 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23
449 \& 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1
450 \& 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ...
451 \&
452 \& FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD
453 \& istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106
454 \& 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0
455 \& 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26
456 \& 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0
457 \& 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ...
458 .Ve
459 .SH "OPTIONS"
460 .IX Header "OPTIONS"
461 .IP "\fB\-\-alpha\fR \fInumber\fR" 4
462 .IX Item "--alpha number"
463 Value of alpha parameter for calculating \fITversky\fR similarity coefficient specified for
464 \&\fB\-b, \-\-BitVectorComparisonMode\fR option. It corresponds to weights assigned for bits set
465 to \*(L"1\*(R" in a pair of fingerprint bit-vectors during the calculation of similarity coefficient. Possible
466 values: \fI0 to 1\fR. Default value: <0.5>.
467 .IP "\fB\-\-beta\fR \fInumber\fR" 4
468 .IX Item "--beta number"
469 Value of beta parameter for calculating \fIWeightedTanimoto\fR and \fIWeightedTversky\fR
470 similarity coefficients specified for \fB\-b, \-\-BitVectorComparisonMode\fR option. It is used to
471 weight the contributions of bits set to \*(L"0\*(R" during the calculation of similarity coefficients. Possible
472 values: \fI0 to 1\fR. Default value of <1> makes \fIWeightedTanimoto\fR and \fIWeightedTversky\fR
473 equivalent to \fITanimoto\fR and \fITversky\fR.
474 .ie n .IP "\fB\-b, \-\-BitVectorComparisonMode\fR \fIAll | ""TanimotoSimilarity,[TverskySimilarity,...]""\fR" 4
475 .el .IP "\fB\-b, \-\-BitVectorComparisonMode\fR \fIAll | ``TanimotoSimilarity,[TverskySimilarity,...]''\fR" 4
476 .IX Item "-b, --BitVectorComparisonMode All | TanimotoSimilarity,[TverskySimilarity,...]"
477 Specify what similarity coefficients to use for calculating similarity matrices for fingerprints bit-vector
478 strings data values in \fITextFile(s)\fR: calculate similarity matrices for all supported similarity
479 coefficients or specify a comma delimited list of similarity coefficients. Possible values:
480 \&\fIAll | "TanimotoSimilarity,[TverskySimilarity,...]\fR. Default: \fITanimotoSimilarity\fR
481 .Sp
482 \&\fIAll\fR uses complete list of supported similarity coefficients: \fIBaroniUrbaniSimilarity, BuserSimilarity,
483 CosineSimilarity, DiceSimilarity, DennisSimilarity, ForbesSimilarity, FossumSimilarity, HamannSimilarity, JacardSimilarity,
484 Kulczynski1Similarity, Kulczynski2Similarity, MatchingSimilarity, McConnaugheySimilarity, OchiaiSimilarity,
485 PearsonSimilarity, RogersTanimotoSimilarity, RussellRaoSimilarity, SimpsonSimilarity, SkoalSneath1Similarity,
486 SkoalSneath2Similarity, SkoalSneath3Similarity, TanimotoSimilarity, TverskySimilarity, YuleSimilarity,
487 WeightedTanimotoSimilarity, WeightedTverskySimilarity\fR. These similarity coefficients are described below.
488 .Sp
489 For two fingerprint bit-vectors A and B of same size, let:
490 .Sp
491 .Vb 4
492 \& Na = Number of bits set to "1" in A
493 \& Nb = Number of bits set to "1" in B
494 \& Nc = Number of bits set to "1" in both A and B
495 \& Nd = Number of bits set to "0" in both A and B
496 \&
497 \& Nt = Number of bits set to "1" or "0" in A or B (Size of A or B)
498 \& Nt = Na + Nb \- Nc + Nd
499 \&
500 \& Na \- Nc = Number of bits set to "1" in A but not in B
501 \& Nb \- Nc = Number of bits set to "1" in B but not in A
502 .Ve
503 .Sp
504 Then, various similarity coefficients [ Ref. 40 \- 42 ] for a pair of bit-vectors A and B are
505 defined as follows:
506 .Sp
507 \&\fIBaroniUrbaniSimilarity\fR: ( \s-1SQRT\s0( Nc * Nd ) + Nc ) / ( \s-1SQRT\s0 ( Nc * Nd ) + Nc + ( Na \- Nc ) + ( Nb \- Nc ) ) ( same as Buser )
508 .Sp
509 \&\fIBuserSimilarity\fR: ( \s-1SQRT\s0 ( Nc * Nd ) + Nc ) / ( \s-1SQRT\s0 ( Nc * Nd ) + Nc + ( Na \- Nc ) + ( Nb \- Nc ) ) ( same as BaroniUrbani )
510 .Sp
511 \&\fICosineSimilarity\fR: Nc / \s-1SQRT\s0 ( Na * Nb ) (same as Ochiai)
512 .Sp
513 \&\fIDiceSimilarity\fR: (2 * Nc) / ( Na + Nb )
514 .Sp
515 \&\fIDennisSimilarity\fR: ( Nc * Nd \- ( ( Na \- Nc ) * ( Nb \- Nc ) ) ) / \s-1SQRT\s0 ( Nt * Na * Nb)
516 .Sp
517 \&\fIForbesSimilarity\fR: ( Nt * Nc ) / ( Na * Nb )
518 .Sp
519 \&\fIFossumSimilarity\fR: ( Nt * ( ( Nc \- 1/2 ) ** 2 ) / ( Na * Nb )
520 .Sp
521 \&\fIHamannSimilarity\fR: ( ( Nc + Nd ) \- ( Na \- Nc ) \- ( Nb \- Nc ) ) / Nt
522 .Sp
523 \&\fIJaccardSimilarity\fR: Nc / ( ( Na \- Nc) + ( Nb \- Nc ) + Nc ) = Nc / ( Na + Nb \- Nc ) (same as Tanimoto)
524 .Sp
525 \&\fIKulczynski1Similarity\fR: Nc / ( ( Na \- Nc ) + ( Nb \- Nc) ) = Nc / ( Na + Nb \- 2Nc )
526 .Sp
527 \&\fIKulczynski2Similarity\fR: ( ( Nc / 2 ) * ( 2 * Nc + ( Na \- Nc ) + ( Nb \- Nc) ) ) / ( ( Nc + ( Na \- Nc ) ) * ( Nc + ( Nb \- Nc ) ) ) = 0.5 * ( Nc / Na + Nc / Nb )
528 .Sp
529 \&\fIMatchingSimilarity\fR: ( Nc + Nd ) / Nt
530 .Sp
531 \&\fIMcConnaugheySimilarity\fR: ( Nc ** 2 \- ( Na \- Nc ) * ( Nb \- Nc) ) / ( Na * Nb )
532 .Sp
533 \&\fIOchiaiSimilarity\fR: Nc / \s-1SQRT\s0 ( Na * Nb ) (same as Cosine)
534 .Sp
535 \&\fIPearsonSimilarity\fR: ( ( Nc * Nd ) \- ( ( Na \- Nc ) * ( Nb \- Nc ) ) / \s-1SQRT\s0 ( Na * Nb * ( Na \- Nc + Nd ) * ( Nb \- Nc + Nd ) )
536 .Sp
537 \&\fIRogersTanimotoSimilarity\fR: ( Nc + Nd ) / ( ( Na \- Nc) + ( Nb \- Nc) + Nt) = ( Nc + Nd ) / ( Na + Nb \- 2Nc + Nt)
538 .Sp
539 \&\fIRussellRaoSimilarity\fR: Nc / Nt
540 .Sp
541 \&\fISimpsonSimilarity\fR: Nc / \s-1MIN\s0 ( Na, Nb)
542 .Sp
543 \&\fISkoalSneath1Similarity\fR: Nc / ( Nc + 2 * ( Na \- Nc) + 2 * ( Nb \- Nc) ) = Nc / ( 2 * Na + 2 * Nb \- 3 * Nc )
544 .Sp
545 \&\fISkoalSneath2Similarity\fR: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt )
546 .Sp
547 \&\fISkoalSneath3Similarity\fR: ( Nc + Nd ) / ( ( Na \- Nc ) + ( Nb \- Nc ) ) = ( Nc + Nd ) / ( Na + Nb \- 2 * Nc )
548 .Sp
549 \&\fITanimotoSimilarity\fR: Nc / ( ( Na \- Nc) + ( Nb \- Nc ) + Nc ) = Nc / ( Na + Nb \- Nc ) (same as Jaccard)
550 .Sp
551 \&\fITverskySimilarity\fR: Nc / ( alpha * ( Na \- Nc ) + ( 1 \- alpha) * ( Nb \- Nc) + Nc ) = Nc / ( alpha * ( Na \- Nb ) + Nb)
552 .Sp
553 \&\fIYuleSimilarity\fR: ( ( Nc * Nd ) \- ( ( Na \- Nc ) * ( Nb \- Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na \- Nc ) * ( Nb \- Nc ) ) )
554 .Sp
555 Values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which
556 are set to \*(L"1\*(R" in both A and B. In order to take into account all bit positions, modified versions
557 of Tanimoto [ Ref. 42 ] and Tversky [ Ref. 43 ] have been developed.
558 .Sp
559 Let:
560 .Sp
561 .Vb 3
562 \& Na\*(Aq = Number of bits set to "0" in A
563 \& Nb\*(Aq = Number of bits set to "0" in B
564 \& Nc\*(Aq = Number of bits set to "0" in both A and B
565 .Ve
566 .Sp
567 Tanimoto': Nc' / ( ( Na' \- Nc') + ( Nb' \- Nc' ) + Nc' ) = Nc' / ( Na' + Nb' \- Nc' )
568 .Sp
569 Tversky': Nc' / ( alpha * ( Na' \- Nc' ) + ( 1 \- alpha) * ( Nb' \- Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' \- Nb' ) + Nb')
570 .Sp
571 Then:
572 .Sp
573 \&\fIWeightedTanimotoSimilarity\fR = beta * Tanimoto + (1 \- beta) * Tanimoto'
574 .Sp
575 \&\fIWeightedTverskySimilarity\fR = beta * Tversky + (1 \- beta) * Tversky'
576 .IP "\fB\-c, \-\-ColMode\fR \fIColNum | ColLabel\fR" 4
577 .IX Item "-c, --ColMode ColNum | ColLabel"
578 Specify how columns are identified in \fITextFile(s)\fR: using column number or column
579 label. Possible values: \fIColNum or ColLabel\fR. Default value: \fIColNum\fR.
580 .IP "\fB\-\-CompoundIDCol\fR \fIcol number | col name\fR" 4
581 .IX Item "--CompoundIDCol col number | col name"
582 This value is \fB\-c, \-\-ColMode\fR mode specific. It specifies input \fITextFile(s)\fR column to use for
583 generating compound \s-1ID\s0 for similarity matrices in output \fITextFile(s)\fR. Possible values: \fIcol number
584 or col label\fR. Default value: \fIfirst column containing the word compoundID in its column label or sequentially
585 generated IDs\fR.
586 .IP "\fB\-\-CompoundIDPrefix\fR \fItext\fR" 4
587 .IX Item "--CompoundIDPrefix text"
588 Specify compound \s-1ID\s0 prefix to use during sequential generation of compound IDs for input \fISDFile(s)\fR
589 and \fITextFile(s)\fR. Default value: \fICmpd\fR. The default value generates compound IDs which look
590 like Cmpd<Number>.
591 .Sp
592 For input \fISDFile(s)\fR, this value is only used during \fILabelPrefix | MolNameOrLabelPrefix\fR values
593 of \fB\-\-CompoundIDMode\fR option; otherwise, it's ignored.
594 .Sp
595 Examples for \fILabelPrefix\fR or \fIMolNameOrLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR:
596 .Sp
597 .Vb 1
598 \& Compound
599 .Ve
600 .Sp
601 The values specified above generates compound IDs which correspond to Compound<Number>
602 instead of default value of Cmpd<Number>.
603 .IP "\fB\-\-CompoundIDField\fR \fIDataFieldName\fR" 4
604 .IX Item "--CompoundIDField DataFieldName"
605 Specify input \fISDFile(s)\fR datafield label for generating compound IDs. This value is only used
606 during \fIDataField\fR value of \fB\-\-CompoundIDMode\fR option.
607 .Sp
608 Examples for \fIDataField\fR value of \fB\-\-CompoundIDMode\fR:
609 .Sp
610 .Vb 2
611 \& MolID
612 \& ExtReg
613 .Ve
614 .IP "\fB\-\-CompoundIDMode\fR \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR" 4
615 .IX Item "--CompoundIDMode DataField | MolName | LabelPrefix | MolNameOrLabelPrefix"
616 Specify how to generate compound IDs from input \fISDFile(s)\fR for similarity matrix \s-1CSV/TSV\s0 text
617 file(s): use a \fISDFile(s)\fR datafield value; use molname line from \fISDFile(s)\fR; generate a sequential \s-1ID\s0
618 with specific prefix; use combination of both MolName and LabelPrefix with usage of LabelPrefix values
619 for empty molname lines.
620 .Sp
621 Possible values: \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR.
622 Default: \fILabelPrefix\fR.
623 .Sp
624 For \fIMolNameAndLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR, molname line in \fISDFile(s)\fR takes
625 precedence over sequential compound IDs generated using \fILabelPrefix\fR and only empty molname
626 values are replaced with sequential compound IDs.
627 .IP "\fB\-d, \-\-detail\fR \fIInfoLevel\fR" 4
628 .IX Item "-d, --detail InfoLevel"
629 Level of information to print about lines being ignored. Default: \fI1\fR. Possible values:
630 \&\fI1, 2 or 3\fR.
631 .IP "\fB\-f, \-\-fast\fR" 4
632 .IX Item "-f, --fast"
633 In this mode, fingerprints columns specified using \fB\-\-FingerprintsCol\fR for \fITextFile(s)\fR and
634 \&\fB\-\-FingerprintsField\fR for \fISDFile(s)\fR are assumed to contain valid fingerprints data and no
635 checking is performed before calculating similarity matrices. By default, fingerprints data is
636 validated before computing pairwise similarity and distance coefficients.
637 .IP "\fB\-\-FingerprintsCol\fR \fIcol number | col name\fR" 4
638 .IX Item "--FingerprintsCol col number | col name"
639 This value is \fB\-c, \-\-colmode\fR specific. It specifies fingerprints column to use during
640 calculation similarity matrices for \fITextFile(s)\fR. Possible values: \fIcol number or col label\fR.
641 Default value: \fIfirst column containing the word Fingerprints in its column label\fR.
642 .IP "\fB\-\-FingerprintsField\fR \fIFieldLabel\fR" 4
643 .IX Item "--FingerprintsField FieldLabel"
644 Fingerprints field label to use during calculation similarity matrices for \fISDFile(s)\fR.
645 Default value: \fIfirst data field label containing the word Fingerprints in its label\fR
646 .IP "\fB\-h, \-\-help\fR" 4
647 .IX Item "-h, --help"
648 Print this help message.
649 .IP "\fB\-\-InDelim\fR \fIcomma | semicolon\fR" 4
650 .IX Item "--InDelim comma | semicolon"
651 Input delimiter for \s-1CSV\s0 \fITextFile(s)\fR. Possible values: \fIcomma or semicolon\fR.
652 Default value: \fIcomma\fR. For \s-1TSV\s0 files, this option is ignored and \fItab\fR is used as a
653 delimiter.
654 .IP "\fB\-\-InputDataMode\fR \fILoadInMemory | ScanFile\fR" 4
655 .IX Item "--InputDataMode LoadInMemory | ScanFile"
656 Specify how fingerprints bit-vector or vector strings data from \fI\s-1SD\s0, \s-1FP\s0 and \s-1CSV/TSV\s0\fR
657 fingerprint file(s) is processed: Retrieve, process and load all available fingerprints
658 data in memory; Retrieve and process data for fingerprints one at a time. Possible values
659 : \fILoadInMemory | ScanFile\fR. Default: \fILoadInMemory\fR.
660 .Sp
661 During \fILoadInMemory\fR value of \fB\-\-InputDataMode\fR, fingerprints bit-vector or vector
662 strings data from input file is retrieved, processed, and loaded into memory all at once
663 as fingerprints objects for generation for similarity matrices.
664 .Sp
665 During \fIScanFile\fR value of \fB\-\-InputDataMode\fR, multiple passes over the input fingerprints
666 file are performed to retrieve and process fingerprints bit-vector or vector strings data one at
667 a time to generate fingerprints objects used during generation of similarity matrices. A temporary
668 copy of the input fingerprints file is made at the start and deleted after generating the matrices.
669 .Sp
670 \&\fIScanFile\fR value of \fB\-\-InputDataMode\fR allows processing of arbitrary large fingerprints files
671 without any additional memory requirement.
672 .IP "\fB\-m, \-\-mode\fR \fIAutoDetect | FingerprintsBitVectorString | FingerprintsVectorString\fR" 4
673 .IX Item "-m, --mode AutoDetect | FingerprintsBitVectorString | FingerprintsVectorString"
674 Format of fingerprint strings data in \fITextFile(s)\fR: automatically detect format of fingerprints
675 string created by MayaChemTools fingerprints generation scripts or explicitly specify its format.
676 Possible values: \fIAutoDetect | FingerprintsBitVectorString | FingerprintsVectorString\fR. Default
677 value: \fIAutoDetect\fR.
678 .IP "\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR" 4
679 .IX Item "--OutDelim comma | tab | semicolon"
680 Delimiter for output \s-1CSV/TSV\s0 text file(s). Possible values: \fIcomma, tab, or semicolon\fR
681 Default value: \fIcomma\fR.
682 .IP "\fB\-\-OutMatrixFormat\fR \fIRowsAndColumns | IDPairsAndValue\fR" 4
683 .IX Item "--OutMatrixFormat RowsAndColumns | IDPairsAndValue"
684 Specify how similarity or distance values calculated for fingerprints vector and bit-vector strings
685 are written to the output \s-1CSV/TSV\s0 text file(s): Generate text files containing rows and columns
686 with their labels corresponding to compound IDs and each matrix element value corresponding to
687 similarity or distance between corresponding compounds; Generate text files containing rows containing
688 compoundIDs for two compounds followed by similarity or distance value between these compounds.
689 .Sp
690 Possible values: \fIRowsAndColumns, or IDPairsAndValue\fR. Default value: \fIRowsAndColumns\fR.
691 .Sp
692 The value of \fB\-\-OutMatrixFormat\fR in conjunction with \fB\-\-OutMatrixType\fR determines type
693 of data written to output files and allows generation of up to 6 different output data formats:
694 .Sp
695 .Vb 1
696 \& OutMatrixFormat OutMatrixType
697 \&
698 \& RowsAndColumns FullMatrix [ DEFAULT ]
699 \& RowsAndColumns UpperTriangularMatrix
700 \& RowsAndColumns LowerTriangularMatrix
701 \&
702 \& IDPairsAndValue FullMatrix
703 \& IDPairsAndValue UpperTriangularMatrix
704 \& IDPairsAndValue LowerTriangularMatrix
705 .Ve
706 .Sp
707 Example of data in output file for \fIRowsAndColumns\fR \fB\-\-OutMatrixFormat\fR value for
708 \&\fIFullMatrix\fR valueof \fB\-\-OutMatrixType\fR:
709 .Sp
710 .Vb 10
711 \& "","Cmpd1","Cmpd2","Cmpd3","Cmpd4","Cmpd5","Cmpd6",... ...
712 \& "Cmpd1","1","0.04","0.25","0.13","0.11","0.2",... ...
713 \& "Cmpd2","0.04","1","0.06","0.05","0.19","0.07",... ...
714 \& "Cmpd3","0.25","0.06","1","0.12","0.22","0.25",... ...
715 \& "Cmpd4","0.13","0.05","0.12","1","0.11","0.13",... ...
716 \& "Cmpd5","0.11","0.19","0.22","0.11","1","0.17",... ...
717 \& "Cmpd6","0.2","0.07","0.25","0.13","0.17","1",... ...
718 \& ... ... ..
719 \& ... ... ..
720 \& ... ... ..
721 .Ve
722 .Sp
723 Example of data in output file for \fIRowsAndColumns\fR \fB\-\-OutMatrixFormat\fR value for
724 \&\fIUpperTriangularMatrix\fR value of \fB\-\-OutMatrixType\fR:
725 .Sp
726 .Vb 10
727 \& "","Cmpd1","Cmpd2","Cmpd3","Cmpd4","Cmpd5","Cmpd6",... ...
728 \& "Cmpd1","1","0.04","0.25","0.13","0.11","0.2",... ...
729 \& "Cmpd2","1","0.06","0.05","0.19","0.07",... ...
730 \& "Cmpd3","1","0.12","0.22","0.25",... ...
731 \& "Cmpd4","1","0.11","0.13",... ...
732 \& "Cmpd5","1","0.17",... ...
733 \& "Cmpd6","1",... ...
734 \& ... ... ..
735 \& ... ... ..
736 \& ... ... ..
737 .Ve
738 .Sp
739 Example of data in output file for \fIRowsAndColumns\fR \fB\-\-OutMatrixFormat\fR value for
740 \&\fILowerTriangularMatrix\fR value of \fB\-\-OutMatrixType\fR:
741 .Sp
742 .Vb 10
743 \& "","Cmpd1","Cmpd2","Cmpd3","Cmpd4","Cmpd5","Cmpd6",... ...
744 \& "Cmpd1","1"
745 \& "Cmpd2","0.04","1"
746 \& "Cmpd3","0.25","0.06","1"
747 \& "Cmpd4","0.13","0.05","0.12","1"
748 \& "Cmpd5","0.11","0.19","0.22","0.11","1"
749 \& "Cmpd6","0.2","0.07","0.25","0.13","0.17","1"
750 \& ... ... ..
751 \& ... ... ..
752 \& ... ... ..
753 .Ve
754 .Sp
755 Example of data in output file for \fIIDPairsAndValue\fR \fB\-\-OutMatrixFormat\fR value for
756 <FullMatrix> value of \fBOutMatrixType\fR:
757 .Sp
758 .Vb 10
759 \& "CmpdID1","CmpdID2","Coefficient Value"
760 \& "Cmpd1","Cmpd1","1"
761 \& "Cmpd1","Cmpd2","0.04"
762 \& "Cmpd1","Cmpd3","0.25"
763 \& "Cmpd1","Cmpd4","0.13"
764 \& ... ... ...
765 \& ... ... ...
766 \& ... ... ...
767 \& "Cmpd2","Cmpd1","0.04"
768 \& "Cmpd2","Cmpd2","1"
769 \& "Cmpd2","Cmpd3","0.06"
770 \& "Cmpd2","Cmpd4","0.05"
771 \& ... ... ...
772 \& ... ... ...
773 \& ... ... ...
774 \& "Cmpd3","Cmpd1","0.25"
775 \& "Cmpd3","Cmpd2","0.06"
776 \& "Cmpd3","Cmpd3","1"
777 \& "Cmpd3","Cmpd4","0.12"
778 \& ... ... ...
779 \& ... ... ...
780 \& ... ... ...
781 .Ve
782 .Sp
783 Example of data in output file for \fIIDPairsAndValue\fR \fB\-\-OutMatrixFormat\fR value for
784 <UpperTriangularMatrix> value of \fB\-\-OutMatrixType\fR:
785 .Sp
786 .Vb 10
787 \& "CmpdID1","CmpdID2","Coefficient Value"
788 \& "Cmpd1","Cmpd1","1"
789 \& "Cmpd1","Cmpd2","0.04"
790 \& "Cmpd1","Cmpd3","0.25"
791 \& "Cmpd1","Cmpd4","0.13"
792 \& ... ... ...
793 \& ... ... ...
794 \& ... ... ...
795 \& "Cmpd2","Cmpd2","1"
796 \& "Cmpd2","Cmpd3","0.06"
797 \& "Cmpd2","Cmpd4","0.05"
798 \& ... ... ...
799 \& ... ... ...
800 \& ... ... ...
801 \& "Cmpd3","Cmpd3","1"
802 \& "Cmpd3","Cmpd4","0.12"
803 \& ... ... ...
804 \& ... ... ...
805 \& ... ... ...
806 .Ve
807 .Sp
808 Example of data in output file for \fIIDPairsAndValue\fR \fB\-\-OutMatrixFormat\fR value for
809 <LowerTriangularMatrix> value of \fB\-\-OutMatrixType\fR:
810 .Sp
811 .Vb 10
812 \& "CmpdID1","CmpdID2","Coefficient Value"
813 \& "Cmpd1","Cmpd1","1"
814 \& "Cmpd2","Cmpd1","0.04"
815 \& "Cmpd2","Cmpd2","1"
816 \& "Cmpd3","Cmpd1","0.25"
817 \& "Cmpd3","Cmpd2","0.06"
818 \& "Cmpd3","Cmpd3","1"
819 \& "Cmpd4","Cmpd1","0.13"
820 \& "Cmpd4","Cmpd2","0.05"
821 \& "Cmpd4","Cmpd3","0.12"
822 \& "Cmpd4","Cmpd4","1"
823 \& ... ... ...
824 \& ... ... ...
825 \& ... ... ...
826 .Ve
827 .IP "\fB\-\-OutMatrixType\fR \fIFullMatrix | UpperTriangularMatrix | LowerTriangularMatrix\fR" 4
828 .IX Item "--OutMatrixType FullMatrix | UpperTriangularMatrix | LowerTriangularMatrix"
829 Type of similarity or distance matrix to calculate for fingerprints vector and bit-vector strings:
830 Calculate full matrix; Calculate lower triangular matrix including diagonal; Calculate upper triangular
831 matrix including diagonal.
832 .Sp
833 Possible values: \fIFullMatrix, UpperTriangularMatrix, or LowerTriangularMatrix\fR. Default value:
834 \&\fIFullMatrix\fR.
835 .Sp
836 The value of \fB\-\-OutMatrixType\fR in conjunction with \fB\-\-OutMatrixFormat\fR determines type
837 of data written to output files.
838 .IP "\fB\-o, \-\-overwrite\fR" 4
839 .IX Item "-o, --overwrite"
840 Overwrite existing files
841 .IP "\fB\-p, \-\-precision\fR \fInumber\fR" 4
842 .IX Item "-p, --precision number"
843 Precision of calculated values in the output file. Default: up to \fI2\fR decimal places.
844 Valid values: positive integers.
845 .IP "\fB\-q, \-\-quote\fR \fIYes | No\fR" 4
846 .IX Item "-q, --quote Yes | No"
847 Put quote around column values in output \s-1CSV/TSV\s0 text file(s). Possible values:
848 \&\fIYes or No\fR. Default value: \fIYes\fR.
849 .IP "\fB\-r, \-\-root\fR \fIRootName\fR" 4
850 .IX Item "-r, --root RootName"
851 New file name is generated using the root: <Root><BitVectorComparisonMode>.<Ext> or
852 <Root><VectorComparisonMode><VectorComparisonFormulism>.<Ext>.
853 The csv, and tsv <Ext> values are used for comma/semicolon, and tab delimited text files
854 respectively. This option is ignored for multiple input files.
855 .ie n .IP "\fB\-v, \-\-VectorComparisonMode\fR \fIAll | ""TanimotoSimilarity,[ManhattanDistance,...]""\fR" 4
856 .el .IP "\fB\-v, \-\-VectorComparisonMode\fR \fIAll | ``TanimotoSimilarity,[ManhattanDistance,...]''\fR" 4
857 .IX Item "-v, --VectorComparisonMode All | TanimotoSimilarity,[ManhattanDistance,...]"
858 Specify what similarity or distance coefficients to use for calculating similarity matrices for
859 fingerprint vector strings data values in \fITextFile(s)\fR: calculate similarity matrices for all
860 supported similarity and distance coefficients or specify a comma delimited list of similarity
861 and distance coefficients. Possible values: \fIAll | \*(L"TanimotoSimilairy,[ManhattanDistance,..]\*(R"\fR.
862 Default: \fITanimotoSimilarity\fR.
863 .Sp
864 The value of \fB\-v, \-\-VectorComparisonMode\fR, in conjunction with \fB\-\-VectorComparisonFormulism\fR,
865 decides which type of similarity and distance coefficient formulism gets used.
866 .Sp
867 \&\fIAll\fR uses complete list of supported similarity and distance coefficients: \fICosineSimilarity,
868 CzekanowskiSimilarity, DiceSimilarity, OchiaiSimilarity, JaccardSimilarity, SorensonSimilarity, TanimotoSimilarity,
869 CityBlockDistance, EuclideanDistance, HammingDistance, ManhattanDistance, SoergelDistance\fR. These
870 similarity and distance coefficients are described below.
871 .Sp
872 \&\fBFingerprintsVector.pm\fR module, used to calculate similarity and distance coefficients,
873 provides support to perform comparison between vectors containing three different types of
874 values:
875 .Sp
876 Type I: OrderedNumericalValues
877 .Sp
878 .Vb 3
879 \& . Size of two vectors are same
880 \& . Vectors contain real values in a specific order. For example: MACCS keys
881 \& count, Topological pharmnacophore atom pairs and so on.
882 .Ve
883 .Sp
884 Type \s-1II:\s0 UnorderedNumericalValues
885 .Sp
886 .Vb 3
887 \& . Size of two vectors might not be same
888 \& . Vectors contain unordered real value identified by value IDs. For example:
889 \& Toplogical atom pairs, Topological atom torsions and so on
890 .Ve
891 .Sp
892 Type \s-1III:\s0 AlphaNumericalValues
893 .Sp
894 .Vb 3
895 \& . Size of two vectors might not be same
896 \& . Vectors contain unordered alphanumerical values. For example: Extended
897 \& connectivity fingerprints, atom neighborhood fingerprints.
898 .Ve
899 .Sp
900 Before performing similarity or distance calculations between vectors containing UnorderedNumericalValues
901 or AlphaNumericalValues, the vectors are transformed into vectors containing unique OrderedNumericalValues
902 using value IDs for UnorderedNumericalValues and values itself for AlphaNumericalValues.
903 .Sp
904 Three forms of similarity and distance calculation between two vectors, specified using \fB\-\-VectorComparisonFormulism\fR
905 option, are supported: \fIAlgebraicForm, BinaryForm or SetTheoreticForm\fR.
906 .Sp
907 For \fIBinaryForm\fR, the ordered list of processed final vector values containing the value or
908 count of each unique value type is simply converted into a binary vector containing 1s and 0s
909 corresponding to presence or absence of values before calculating similarity or distance between
910 two vectors.
911 .Sp
912 For two fingerprint vectors A and B of same size containing OrderedNumericalValues, let:
913 .Sp
914 .Vb 1
915 \& N = Number values in A or B
916 \&
917 \& Xa = Values of vector A
918 \& Xb = Values of vector B
919 \&
920 \& Xai = Value of ith element in A
921 \& Xbi = Value of ith element in B
922 \&
923 \& SUM = Sum of i over N values
924 .Ve
925 .Sp
926 For SetTheoreticForm of calculation between two vectors, let:
927 .Sp
928 .Vb 2
929 \& SetIntersectionXaXb = SUM ( MIN ( Xai, Xbi ) )
930 \& SetDifferenceXaXb = SUM ( Xai ) + SUM ( Xbi ) \- SUM ( MIN ( Xai, Xbi ) )
931 .Ve
932 .Sp
933 For BinaryForm of calculation between two vectors, let:
934 .Sp
935 .Vb 5
936 \& Na = Number of bits set to "1" in A = SUM ( Xai )
937 \& Nb = Number of bits set to "1" in B = SUM ( Xbi )
938 \& Nc = Number of bits set to "1" in both A and B = SUM ( Xai * Xbi )
939 \& Nd = Number of bits set to "0" in both A and B
940 \& = SUM ( 1 \- Xai \- Xbi + Xai * Xbi)
941 \&
942 \& N = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb \- Nc + Nd
943 .Ve
944 .Sp
945 Additionally, for BinaryForm various values also correspond to:
946 .Sp
947 .Vb 4
948 \& Na = | Xa |
949 \& Nb = | Xb |
950 \& Nc = | SetIntersectionXaXb |
951 \& Nd = N \- | SetDifferenceXaXb |
952 \&
953 \& | SetDifferenceXaXb | = N \- Nd = Na + Nb \- Nc + Nd \- Nd = Na + Nb \- Nc
954 \& = | Xa | + | Xb | \- | SetIntersectionXaXb |
955 .Ve
956 .Sp
957 Various similarity and distance coefficients [ Ref 40, Ref 62, Ref 64 ] for a pair of vectors A and B
958 in \fIAlgebraicForm, BinaryForm and SetTheoreticForm\fR are defined as follows:
959 .Sp
960 \&\fBCityBlockDistance\fR: ( same as HammingDistance and ManhattanDistance)
961 .Sp
962 \&\fIAlgebraicForm\fR: \s-1SUM\s0 ( \s-1ABS\s0 ( Xai \- Xbi ) )
963 .Sp
964 \&\fIBinaryForm\fR: ( Na \- Nc ) + ( Nb \- Nc ) = Na + Nb \- 2 * Nc
965 .Sp
966 \&\fISetTheoreticForm\fR: | SetDifferenceXaXb | \- | SetIntersectionXaXb | = \s-1SUM\s0 ( Xai ) + \s-1SUM\s0 ( Xbi ) \- 2 * ( \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) )
967 .Sp
968 \&\fBCosineSimilarity\fR: ( same as OchiaiSimilarityCoefficient)
969 .Sp
970 \&\fIAlgebraicForm\fR: \s-1SUM\s0 ( Xai * Xbi ) / \s-1SQRT\s0 ( \s-1SUM\s0 ( Xai ** 2) * \s-1SUM\s0 ( Xbi ** 2) )
971 .Sp
972 \&\fIBinaryForm\fR: Nc / \s-1SQRT\s0 ( Na * Nb)
973 .Sp
974 \&\fISetTheoreticForm\fR: | SetIntersectionXaXb | / \s-1SQRT\s0 ( |Xa| * |Xb| ) = \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) / \s-1SQRT\s0 ( \s-1SUM\s0 ( Xai ) * \s-1SUM\s0 ( Xbi ) )
975 .Sp
976 \&\fBCzekanowskiSimilarity\fR: ( same as DiceSimilarity and SorensonSimilarity)
977 .Sp
978 \&\fIAlgebraicForm\fR: ( 2 * ( \s-1SUM\s0 ( Xai * Xbi ) ) ) / ( \s-1SUM\s0 ( Xai ** 2) + \s-1SUM\s0 ( Xbi **2 ) )
979 .Sp
980 \&\fIBinaryForm\fR: 2 * Nc / ( Na + Nb )
981 .Sp
982 \&\fISetTheoreticForm\fR: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) ) / ( \s-1SUM\s0 ( Xai ) + \s-1SUM\s0 ( Xbi ) )
983 .Sp
984 \&\fBDiceSimilarity\fR: ( same as CzekanowskiSimilarity and SorensonSimilarity)
985 .Sp
986 \&\fIAlgebraicForm\fR: ( 2 * ( \s-1SUM\s0 ( Xai * Xbi ) ) ) / ( \s-1SUM\s0 ( Xai ** 2) + \s-1SUM\s0 ( Xbi **2 ) )
987 .Sp
988 \&\fIBinaryForm\fR: 2 * Nc / ( Na + Nb )
989 .Sp
990 \&\fISetTheoreticForm\fR: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) ) / ( \s-1SUM\s0 ( Xai ) + \s-1SUM\s0 ( Xbi ) )
991 .Sp
992 \&\fBEuclideanDistance\fR:
993 .Sp
994 \&\fIAlgebraicForm\fR: \s-1SQRT\s0 ( \s-1SUM\s0 ( ( ( Xai \- Xbi ) ** 2 ) ) )
995 .Sp
996 \&\fIBinaryForm\fR: \s-1SQRT\s0 ( ( Na \- Nc ) + ( Nb \- Nc ) ) = \s-1SQRT\s0 ( Na + Nb \- 2 * Nc )
997 .Sp
998 \&\fISetTheoreticForm\fR: \s-1SQRT\s0 ( | SetDifferenceXaXb | \- | SetIntersectionXaXb | ) = \s-1SQRT\s0 ( \s-1SUM\s0 ( Xai ) + \s-1SUM\s0 ( Xbi ) \- 2 * ( \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) ) )
999 .Sp
1000 \&\fBHammingDistance\fR: ( same as CityBlockDistance and ManhattanDistance)
1001 .Sp
1002 \&\fIAlgebraicForm\fR: \s-1SUM\s0 ( \s-1ABS\s0 ( Xai \- Xbi ) )
1003 .Sp
1004 \&\fIBinaryForm\fR: ( Na \- Nc ) + ( Nb \- Nc ) = Na + Nb \- 2 * Nc
1005 .Sp
1006 \&\fISetTheoreticForm\fR: | SetDifferenceXaXb | \- | SetIntersectionXaXb | = \s-1SUM\s0 ( Xai ) + \s-1SUM\s0 ( Xbi ) \- 2 * ( \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) )
1007 .Sp
1008 \&\fBJaccardSimilarity\fR: ( same as TanimotoSimilarity)
1009 .Sp
1010 \&\fIAlgebraicForm\fR: \s-1SUM\s0 ( Xai * Xbi ) / ( \s-1SUM\s0 ( Xai ** 2 ) + \s-1SUM\s0 ( Xbi ** 2 ) \- \s-1SUM\s0 ( Xai * Xbi ) )
1011 .Sp
1012 \&\fIBinaryForm\fR: Nc / ( ( Na \- Nc ) + ( Nb \- Nc ) + Nc ) = Nc / ( Na + Nb \- Nc )
1013 .Sp
1014 \&\fISetTheoreticForm\fR: | SetIntersectionXaXb | / | SetDifferenceXaXb | = \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) / ( \s-1SUM\s0 ( Xai ) + \s-1SUM\s0 ( Xbi ) \- \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) )
1015 .Sp
1016 \&\fBManhattanDistance\fR: ( same as CityBlockDistance and HammingDistance)
1017 .Sp
1018 \&\fIAlgebraicForm\fR: \s-1SUM\s0 ( \s-1ABS\s0 ( Xai \- Xbi ) )
1019 .Sp
1020 \&\fIBinaryForm\fR: ( Na \- Nc ) + ( Nb \- Nc ) = Na + Nb \- 2 * Nc
1021 .Sp
1022 \&\fISetTheoreticForm\fR: | SetDifferenceXaXb | \- | SetIntersectionXaXb | = \s-1SUM\s0 ( Xai ) + \s-1SUM\s0 ( Xbi ) \- 2 * ( \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) )
1023 .Sp
1024 \&\fBOchiaiSimilarity\fR: ( same as CosineSimilarity)
1025 .Sp
1026 \&\fIAlgebraicForm\fR: \s-1SUM\s0 ( Xai * Xbi ) / \s-1SQRT\s0 ( \s-1SUM\s0 ( Xai ** 2) * \s-1SUM\s0 ( Xbi ** 2) )
1027 .Sp
1028 \&\fIBinaryForm\fR: Nc / \s-1SQRT\s0 ( Na * Nb)
1029 .Sp
1030 \&\fISetTheoreticForm\fR: | SetIntersectionXaXb | / \s-1SQRT\s0 ( |Xa| * |Xb| ) = \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) / \s-1SQRT\s0 ( \s-1SUM\s0 ( Xai ) * \s-1SUM\s0 ( Xbi ) )
1031 .Sp
1032 \&\fBSorensonSimilarity\fR: ( same as CzekanowskiSimilarity and DiceSimilarity)
1033 .Sp
1034 \&\fIAlgebraicForm\fR: ( 2 * ( \s-1SUM\s0 ( Xai * Xbi ) ) ) / ( \s-1SUM\s0 ( Xai ** 2) + \s-1SUM\s0 ( Xbi **2 ) )
1035 .Sp
1036 \&\fIBinaryForm\fR: 2 * Nc / ( Na + Nb )
1037 .Sp
1038 \&\fISetTheoreticForm\fR: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| ) = 2 * ( \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) ) / ( \s-1SUM\s0 ( Xai ) + \s-1SUM\s0 ( Xbi ) )
1039 .Sp
1040 \&\fBSoergelDistance\fR:
1041 .Sp
1042 \&\fIAlgebraicForm\fR: \s-1SUM\s0 ( \s-1ABS\s0 ( Xai \- Xbi ) ) / \s-1SUM\s0 ( \s-1MAX\s0 ( Xai, Xbi ) )
1043 .Sp
1044 \&\fIBinaryForm\fR: 1 \- Nc / ( Na + Nb \- Nc ) = ( Na + Nb \- 2 * Nc ) / ( Na + Nb \- Nc )
1045 .Sp
1046 \&\fISetTheoreticForm\fR: ( | SetDifferenceXaXb | \- | SetIntersectionXaXb | ) / | SetDifferenceXaXb | = ( \s-1SUM\s0 ( Xai ) + \s-1SUM\s0 ( Xbi ) \- 2 * ( \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) ) ) / ( \s-1SUM\s0 ( Xai ) + \s-1SUM\s0 ( Xbi ) \- \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) )
1047 .Sp
1048 \&\fBTanimotoSimilarity\fR: ( same as JaccardSimilarity)
1049 .Sp
1050 \&\fIAlgebraicForm\fR: \s-1SUM\s0 ( Xai * Xbi ) / ( \s-1SUM\s0 ( Xai ** 2 ) + \s-1SUM\s0 ( Xbi ** 2 ) \- \s-1SUM\s0 ( Xai * Xbi ) )
1051 .Sp
1052 \&\fIBinaryForm\fR: Nc / ( ( Na \- Nc ) + ( Nb \- Nc ) + Nc ) = Nc / ( Na + Nb \- Nc )
1053 .Sp
1054 \&\fISetTheoreticForm\fR: | SetIntersectionXaXb | / | SetDifferenceXaXb | = \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) / ( \s-1SUM\s0 ( Xai ) + \s-1SUM\s0 ( Xbi ) \- \s-1SUM\s0 ( \s-1MIN\s0 ( Xai, Xbi ) ) )
1055 .ie n .IP "\fB\-\-VectorComparisonFormulism\fR \fIAll | ""AlgebraicForm,[BinaryForm,SetTheoreticForm]""\fR" 4
1056 .el .IP "\fB\-\-VectorComparisonFormulism\fR \fIAll | ``AlgebraicForm,[BinaryForm,SetTheoreticForm]''\fR" 4
1057 .IX Item "--VectorComparisonFormulism All | AlgebraicForm,[BinaryForm,SetTheoreticForm]"
1058 Specify fingerprints vector comparison formulism to use for calculation similarity and distance
1059 coefficients during \fB\-v, \-\-VectorComparisonMode\fR: use all supported comparison formulisms
1060 or specify a comma delimited. Possible values: \fIAll | \*(L"AlgebraicForm,[BinaryForm,SetTheoreticForm]\*(R"\fR.
1061 Default value: \fIAlgebraicForm\fR.
1062 .Sp
1063 \&\fIAll\fR uses all three forms of supported vector comparison formulism for values of \fB\-v, \-\-VectorComparisonMode\fR
1064 option.
1065 .Sp
1066 For fingerprint vector strings containing \fBAlphaNumericalValues\fR data values \- \fBExtendedConnectivityFingerprints\fR,
1067 \&\fBAtomNeighborhoodsFingerprints\fR and so on \- all three formulism result in same value during similarity and distance
1068 calculations.
1069 .IP "\fB\-w, \-\-WorkingDir\fR \fIDirName\fR" 4
1070 .IX Item "-w, --WorkingDir DirName"
1071 Location of working directory. Default: current directory.
1072 .SH "EXAMPLES"
1073 .IX Header "EXAMPLES"
1074 To generate a similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1075 bit-vector strings data corresponding to supported fingerprints in text file present in a column
1076 name containing Fingerprint substring by loading all fingerprints data into memory and create a
1077 SampleFPHexTanimotoSimilarity.csv file containing compound IDs retrieved from column name
1078 containing CompoundID substring, type:
1079 .PP
1080 .Vb 1
1081 \& % SimilarityMatricesFingerprints.pl \-o SampleFPHex.csv
1082 .Ve
1083 .PP
1084 To generate a similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1085 bit-vector strings data corresponding to supported fingerprints in \s-1SD\s0 File present in a data field
1086 with Fingerprint substring in its label by loading all fingerprints data into memory and create a
1087 SampleFPHexTanimotoSimilarity.csv file containing sequentially generated compound IDs with
1088 Cmpd prefix, type:
1089 .PP
1090 .Vb 1
1091 \& % SimilarityMatricesFingerprints.pl \-o SampleFPHex.sdf
1092 .Ve
1093 .PP
1094 To generate a similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1095 bit-vector strings data corresponding to supported fingerprints in \s-1FP\s0 file by loading all fingerprints
1096 data into memory and create a SampleFPHexTanimotoSimilarity.csv file along with compound IDs
1097 retrieved from \s-1FP\s0 file, type:
1098 .PP
1099 .Vb 1
1100 \& % SimilarityMatricesFingerprints.pl \-o SampleFPHex.fpf
1101 .Ve
1102 .PP
1103 To generate a lower triangular similarity matrix corresponding to Tanimoto similarity coefficient for
1104 fingerprints bit-vector strings data corresponding to supported fingerprints in text file present in a
1105 column name containing Fingerprint substring by loading all fingerprints data into memory and create
1106 a SampleFPHexTanimotoSimilarity.csv file containing compound IDs retrieved from column name
1107 containing CompoundID substring, type:
1108 .PP
1109 .Vb 3
1110 \& % SimilarityMatricesFingerprints.pl \-o \-\-InputDataMode LoadInMemory
1111 \& \-\-OutMatrixFormat RowsAndColumns \-\-OutMatrixType LowerTriangularMatrix
1112 \& SampleFPHex.csv
1113 .Ve
1114 .PP
1115 To generate a upper triangular similarity matrix corresponding to Tanimoto similarity coefficient for
1116 fingerprints bit-vector strings data corresponding to supported fingerprints in text file present in a
1117 column name containing Fingerprint substring by loading all fingerprints data into memory and create
1118 a SampleFPHexTanimotoSimilarity.csv file in IDPairsAndValue format containing compound IDs retrieved
1119 from column name containing CompoundID substring, type:
1120 .PP
1121 .Vb 3
1122 \& % SimilarityMatricesFingerprints.pl \-o \-\-InputDataMode LoadInMemory
1123 \& \-\-OutMatrixFormat IDPairsAndValue \-\-OutMatrixType UpperTriangularMatrix
1124 \& SampleFPHex.csv
1125 .Ve
1126 .PP
1127 To generate a full similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1128 bit-vector strings data corresponding to supported fingerprints in text file present in a column
1129 name containing Fingerprint substring by scanning file without loading all fingerprints data into memory
1130 and create a SampleFPHexTanimotoSimilarity.csv file containing compound IDs retrieved from
1131 column name containing CompoundID substring, type:
1132 .PP
1133 .Vb 3
1134 \& % SimilarityMatricesFingerprints.pl \-o \-\-InputDataMode ScanFile
1135 \& \-\-OutMatrixFormat RowsAndColumns \-\-OutMatrixType FullMatrix
1136 \& SampleFPHex.csv
1137 .Ve
1138 .PP
1139 To generate a lower triangular similarity matrix corresponding to Tanimoto similarity coefficient for
1140 fingerprints bit-vector strings data corresponding to supported fingerprints in text file present in a
1141 column name containing Fingerprint substring by scanning file without loading all fingerprints data into
1142 memory and create a SampleFPHexTanimotoSimilarity.csv file in IDPairsAndValue format containing
1143 compound IDs retrieved from column name containing CompoundID substring, type:
1144 .PP
1145 .Vb 3
1146 \& % SimilarityMatricesFingerprints.pl \-o \-\-InputDataMode ScanFile
1147 \& \-\-OutMatrixFormat IDPairsAndValue \-\-OutMatrixType LowerTriangularMatrix
1148 \& SampleFPHex.csv
1149 .Ve
1150 .PP
1151 To generate a similarity matrix corresponding to Tanimoto similarity coefficient using algebraic formulism
1152 for fingerprints vector strings data corresponding to supported fingerprints in text file present in a column name
1153 containing Fingerprint substring and create a SampleFPCountTanimotoSimilarityAlgebraicForm.csv file
1154 containing compound IDs retrieved from column name containing CompoundID substring, type:
1155 .PP
1156 .Vb 1
1157 \& % SimilarityMatricesFingerprints.pl \-o SampleFPCount.csv
1158 .Ve
1159 .PP
1160 To generate a similarity matrix corresponding to Tanimoto similarity coefficient using algebraic formulism
1161 for fingerprints vector strings data corresponding to supported fingerprints in \s-1SD\s0 file present in a data field with
1162 Fingerprint substring in its label and create a SampleFPCountTanimotoSimilarityAlgebraicForm.csv file
1163 containing sequentially generated compound IDs with Cmpd prefix, type:
1164 .PP
1165 .Vb 1
1166 \& % SimilarityMatricesFingerprints.pl \-o SampleFPCount.sdf
1167 .Ve
1168 .PP
1169 To generate a similarity matrix corresponding to Tanimoto similarity coefficient using algebraic formulism
1170 vector strings data corresponding to supported fingerprints in \s-1FP\s0 file and create a
1171 SampleFPCountTanimotoSimilarityAlgebraicForm.csv file along with compound IDs retrieved from \s-1FP\s0 file, type:
1172 .PP
1173 .Vb 1
1174 \& % SimilarityMatricesFingerprints.pl \-o SampleFPCount.fpf
1175 .Ve
1176 .PP
1177 To generate a similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1178 bit-vector strings data corresponding to supported fingerprints in text file present in a column name
1179 containing Fingerprint substring and create a SampleFPHexTanimotoSimilarity.csv file in
1180 IDPairsAndValue format containing compound IDs retrieved from column name containing
1181 CompoundID substring, type:
1182 .PP
1183 .Vb 2
1184 \& % SimilarityMatricesFingerprints.pl \-\-OutMatrixFormat IDPairsAndValue \-o
1185 \& SampleFPHex.csv
1186 .Ve
1187 .PP
1188 To generate a similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1189 bit-vector strings data corresponding to supported fingerprints in \s-1SD\s0 file present in a data field with
1190 Fingerprint substring in its label and create a SampleFPHexTanimotoSimilarity.csv file in
1191 IDPairsAndValue format containing sequentially generated compound IDs with Cmpd prefix,
1192 type:
1193 .PP
1194 .Vb 2
1195 \& % SimilarityMatricesFingerprints.pl \-\-OutMatrixFormat IDPairsAndValue \-o
1196 \& SampleFPHex.sdf
1197 .Ve
1198 .PP
1199 To generate a similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1200 bit-vector strings data corresponding to supported fingerprints in \s-1FP\s0 file and create a
1201 SampleFPHexTanimotoSimilarity.csv file in IDPairsAndValue format along with compound IDs retrieved
1202 from \s-1FP\s0 file, type:
1203 .PP
1204 .Vb 2
1205 \& % SimilarityMatricesFingerprints.pl \-\-OutMatrixFormat IDPairsAndValue \-o
1206 \& SampleFPHex.fpf
1207 .Ve
1208 .PP
1209 To generate a similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1210 bit-vector strings data corresponding to supported fingerprints in \s-1SD\s0 file present in a data field with
1211 Fingerprint substring in its label and create a SampleFPHexTanimotoSimilarity.csv file
1212 containing compound IDs from mol name line, type:
1213 .PP
1214 .Vb 2
1215 \& % SimilarityMatricesFingerprints.pl \-\-CompoundIDMode MolName \-o
1216 \& SampleFPHex.sdf
1217 .Ve
1218 .PP
1219 To generate a similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1220 bit-vector strings data corresponding to supported fingerprints present in a data field with
1221 Fingerprint substring in its label and create a SampleFPHexTanimotoSimilarity.csv file
1222 containing compound IDs from data field name Mol_ID, type:
1223 .PP
1224 .Vb 2
1225 \& % SimilarityMatricesFingerprints.pl \-\-CompoundIDMode DataField
1226 \& \-\-CompoundIDField Mol_ID \-o SampleFPBin.sdf
1227 .Ve
1228 .PP
1229 To generate similarity matrices corresponding to Buser, Dice and Tanimoto similarity coefficient
1230 for fingerprints bit-vector strings data corresponding to supported fingerprints present in a column
1231 name containing Fingerprint substring and create SampleFPBin[CoefficientName]Similarity.csv files
1232 containing compound IDs retrieved from column name containing CompoundID substring, type:
1233 .PP
1234 .Vb 2
1235 \& % SimilarityMatricesFingerprints.pl \-b "BuserSimilarity,DiceSimilarity,
1236 \& TanimotoSimilarity" \-o SampleFPBin.csv
1237 .Ve
1238 .PP
1239 To generate similarity matrices corresponding to Buser, Dice and Tanimoto similarity coefficient
1240 for fingerprints bit-vector strings data corresponding to supported fingerprints present in a data field with
1241 Fingerprint substring in its label and create SampleFPBin[CoefficientName]Similarity.csv files
1242 containing sequentially generated compound IDs with Cmpd prefix, type:
1243 .PP
1244 .Vb 2
1245 \& % SimilarityMatricesFingerprints.pl \-b "BuserSimilarity,DiceSimilarity,
1246 \& TanimotoSimilarity" \-o SampleFPBin.sdf
1247 .Ve
1248 .PP
1249 To generate similarity matrices corresponding to CityBlock distance and Tanimoto similarity coefficients using
1250 algebraic formulism for fingerprints vector strings data corresponding to supported fingerprints present in
1251 a column name containing Fingerprint substring and create SampleFPCount[CoefficientName]AlgebraicForm.csv
1252 files containing compound IDs retrieved from column name containing CompoundID substring, type:
1253 .PP
1254 .Vb 2
1255 \& % SimilarityMatricesFingerprints.pl \-v "CityBlockDistance,
1256 \& TanimotoSimilarity" \-o SampleFPCount.csv
1257 .Ve
1258 .PP
1259 To generate similarity matrices corresponding to CityBlock distance and Tanimoto similarity coefficients using
1260 algebraic formulism for fingerprints vector strings data corresponding to supported fingerprints present in
1261 a data field with Fingerprint substring in its label and create SampleFPCount[CoefficientName]AlgebraicForm.csv
1262 files containing sequentially generated compound IDs with Cmpd prefix, type:
1263 .PP
1264 .Vb 2
1265 \& % SimilarityMatricesFingerprints.pl \-v "CityBlockDistance,
1266 \& TanimotoSimilarity" \-o SampleFPCount.sdf
1267 .Ve
1268 .PP
1269 To generate similarity matrices corresponding to CityBlock distance Tanimoto similarity coefficients using
1270 binary formulism for fingerprints vector strings data corresponding to supported fingerprints present in
1271 a column name containing Fingerprint substring and create SampleFPCount[CoefficientName]Binary.csv
1272 files containing compound IDs retrieved from column name containing CompoundID substring, type:
1273 .PP
1274 .Vb 3
1275 \& % SimilarityMatricesFingerprints.pl \-v "CityBlockDistance,
1276 \& TanimotoSimilarity" \-\-VectorComparisonFormulism BinaryForm \-o
1277 \& SampleFPCount.csv
1278 .Ve
1279 .PP
1280 To generate similarity matrices corresponding to CityBlock distance Tanimoto similarity coefficients using
1281 binary formulism for fingerprints vector strings data corresponding to supported fingerprints present in
1282 a data field with Fingerprint substring in its label and create SampleFPCount[CoefficientName]Binary.csv
1283 files containing sequentially generated compound IDs with Cmpd prefix, type:
1284 .PP
1285 .Vb 3
1286 \& % SimilarityMatricesFingerprints.pl \-v "CityBlockDistance,
1287 \& TanimotoSimilarity" \-\-VectorComparisonFormulism BinaryForm \-o
1288 \& SampleFPCount.sdf
1289 .Ve
1290 .PP
1291 To generate similarity matrices corresponding to CityBlock distance Tanimoto similarity coefficients using
1292 all supported comparison formulisms for fingerprints vector strings data corresponding to supported
1293 fingerprints present in a column name containing Fingerprint substring and create
1294 SampleFPCount[CoefficientName][FormulismName].csv files containing compound IDs retrieved from column
1295 name containing CompoundID substring, type:
1296 .PP
1297 .Vb 2
1298 \& % SimilarityMatricesFingerprints.pl \-v "CityBlockDistance,
1299 \& TanimotoSimilarity" \-\-VectorComparisonFormulism All \-o SampleFPCount.csv
1300 .Ve
1301 .PP
1302 To generate similarity matrices corresponding to CityBlock distance Tanimoto similarity coefficients using
1303 all supported comparison formulisms for fingerprints vector strings data corresponding to supported
1304 fingerprints present in a data field with Fingerprint substring in its label and create
1305 SampleFPCount[CoefficientName][FormulismName].csv files containing sequentially generated
1306 compound IDs with Cmpd prefix, type:
1307 .PP
1308 .Vb 2
1309 \& % SimilarityMatricesFingerprints.pl \-v "CityBlockDistance,TanimotoSimilarity"
1310 \& \-\-VectorComparisonFormulism All \-o SampleFPCount.sdf
1311 .Ve
1312 .PP
1313 To generate similarity matrices corresponding to all available similarity coefficient for fingerprints
1314 bit-vector strings data corresponding to supported fingerprints present in a column name
1315 containing Fingerprint substring and create SampleFPHex[CoefficientName].csv files
1316 containing compound IDs retrieved from column name containing CompoundID substring, type:
1317 .PP
1318 .Vb 2
1319 \& % SimilarityMatricesFingerprints.pl \-m AutoDetect \-\-BitVectorComparisonMode
1320 \& All \-\-alpha 0.5 \-beta 0.5 \-o SampleFPHex.csv
1321 .Ve
1322 .PP
1323 To generate similarity matrices corresponding to all available similarity coefficient for fingerprints
1324 bit-vector strings data corresponding to supported fingerprints present in a data field with Fingerprint
1325 substring in its label and create SampleFPHex[CoefficientName].csv files containing sequentially
1326 generated compound IDs with Cmpd prefix, type
1327 .PP
1328 .Vb 2
1329 \& % SimilarityMatricesFingerprints.pl \-m AutoDetect \-\-BitVectorComparisonMode
1330 \& All \-\-alpha 0.5 \-beta 0.5 \-o SampleFPHex.sdf
1331 .Ve
1332 .PP
1333 To generate similarity matrices corresponding to all available similarity and distance coefficients using
1334 all comparison formulism for fingerprints vector strings data corresponding to supported fingerprints
1335 present in a column name containing Fingerprint substring and create
1336 SampleFPCount[CoefficientName][FormulismName].csv files containing compound IDs
1337 retrieved from column name containing CompoundID substring, type:
1338 .PP
1339 .Vb 2
1340 \& % SimilarityMatricesFingerprints.pl \-m AutoDetect \-\-VectorComparisonMode
1341 \& All \-\-VectorComparisonFormulism All \-o SampleFPCount.csv
1342 .Ve
1343 .PP
1344 To generate similarity matrices corresponding to all available similarity and distance coefficients using
1345 all comparison formulism for fingerprints vector strings data corresponding to supported fingerprints
1346 present in a data field with Fingerprint substring in its label and create
1347 SampleFPCount[CoefficientName][FormulismName].csv files containing sequentially generated
1348 compound IDs with Cmpd prefix, type:
1349 .PP
1350 .Vb 2
1351 \& % SimilarityMatricesFingerprints.pl \-m AutoDetect \-\-VectorComparisonMode
1352 \& All \-\-VectorComparisonFormulism All \-o SampleFPCount.sdf
1353 .Ve
1354 .PP
1355 To generate a similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1356 bit-vector strings data corresponding to supported fingerprints present in a column number 2
1357 and create a SampleFPHexTanimotoSimilarity.csv file containing compound IDs retrieved column
1358 number 1, type:
1359 .PP
1360 .Vb 2
1361 \& % SimilarityMatricesFingerprints.pl \-\-ColMode ColNum \-\-CompoundIDCol 1
1362 \& \-\-FingerprintsCol 2 \-o SampleFPHex.csv
1363 .Ve
1364 .PP
1365 To generate a similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1366 bit-vector strings data corresponding to supported fingerprints present in a data field name
1367 Fingerprints and create a SampleFPHexTanimotoSimilarity.csv file containing compound IDs
1368 present in data field name Mol_ID, type:
1369 .PP
1370 .Vb 2
1371 \& % SimilarityMatricesFingerprints.pl \-\-FingerprintsField Fingerprints
1372 \& \-\-CompoundIDMode DataField \-\-CompoundIDField Mol_ID \-o SampleFPHex.sdf
1373 .Ve
1374 .PP
1375 To generate a similarity matrix corresponding to Tversky similarity coefficient for fingerprints
1376 bit-vector strings data corresponding to supported fingerprints present in a column named Fingerprints
1377 and create a SampleFPHexTverskySimilarity.tsv file containing compound IDs retrieved column named
1378 CompoundID, type:
1379 .PP
1380 .Vb 4
1381 \& % SimilarityMatricesFingerprints.pl \-\-BitVectorComparisonMode
1382 \& TverskySimilarity \-\-alpha 0.5 \-\-ColMode ColLabel \-\-CompoundIDCol
1383 \& CompoundID \-\-FingerprintsCol Fingerprints \-\-OutDelim Tab \-\-quote No
1384 \& \-o SampleFPHex.csv
1385 .Ve
1386 .PP
1387 To generate a similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1388 bit-vector strings data corresponding to supported fingerprints present in a data field
1389 with Fingerprint substring in its label and create a SampleFPHexTanimotoSimilarity.csv file
1390 containing compound IDs from molname line or sequentially generated compound IDs
1391 with Mol prefix, type:
1392 .PP
1393 .Vb 2
1394 \& % SimilarityMatricesFingerprints.pl \-\-CompoundIDMode MolnameOrLabelPrefix
1395 \& \-\-CompoundIDPrefix Mol \-o SampleFPHex.sdf
1396 .Ve
1397 .PP
1398 To generate a similarity matrix corresponding to Tanimoto similarity coefficient for fingerprints
1399 bit-vector strings data corresponding to supported fingerprints present in a data field with
1400 Fingerprint substring in its label and create a SampleFPHexTanimotoSimilarity.tsv file
1401 containing sequentially generated compound IDs with Cmpd prefix, type:
1402 .PP
1403 .Vb 1
1404 \& % SimilarityMatricesFingerprints.pl \-OutDelim Tab \-\-quote No \-o SampleFPHex.sdf
1405 .Ve
1406 .SH "AUTHOR"
1407 .IX Header "AUTHOR"
1408 Manish Sud <msud@san.rr.com>
1409 .SH "SEE ALSO"
1410 .IX Header "SEE ALSO"
1411 InfoFingerprintsFiles.pl, SimilaritySearchingFingerprints.pl, AtomNeighborhoodsFingerprints.pl,
1412 ExtendedConnectivityFingerprints.pl, MACCSKeysFingerprints.pl, PathLengthFingerprints.pl,
1413 TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl,
1414 TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl
1415 .SH "COPYRIGHT"
1416 .IX Header "COPYRIGHT"
1417 Copyright (C) 2015 Manish Sud. All rights reserved.
1418 .PP
1419 This file is part of MayaChemTools.
1420 .PP
1421 MayaChemTools is free software; you can redistribute it and/or modify it under
1422 the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free
1423 Software Foundation; either version 3 of the License, or (at your option)
1424 any later version.