Mercurial > repos > deepakjadmin > mayatool3_test2
comparison docs/scripts/man1/MACCSKeysFingerprints.1 @ 0:4816e4a8ae95 draft default tip
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 09:23:18 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4816e4a8ae95 |
|---|---|
| 1 .\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22) | |
| 2 .\" | |
| 3 .\" Standard preamble: | |
| 4 .\" ======================================================================== | |
| 5 .de Sp \" Vertical space (when we can't use .PP) | |
| 6 .if t .sp .5v | |
| 7 .if n .sp | |
| 8 .. | |
| 9 .de Vb \" Begin verbatim text | |
| 10 .ft CW | |
| 11 .nf | |
| 12 .ne \\$1 | |
| 13 .. | |
| 14 .de Ve \" End verbatim text | |
| 15 .ft R | |
| 16 .fi | |
| 17 .. | |
| 18 .\" Set up some character translations and predefined strings. \*(-- will | |
| 19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
| 20 .\" double quote, and \*(R" will give a right double quote. \*(C+ will | |
| 21 .\" give a nicer C++. Capital omega is used to do unbreakable dashes and | |
| 22 .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, | |
| 23 .\" nothing in troff, for use with C<>. | |
| 24 .tr \(*W- | |
| 25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
| 26 .ie n \{\ | |
| 27 . ds -- \(*W- | |
| 28 . ds PI pi | |
| 29 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
| 30 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
| 31 . ds L" "" | |
| 32 . ds R" "" | |
| 33 . ds C` "" | |
| 34 . ds C' "" | |
| 35 'br\} | |
| 36 .el\{\ | |
| 37 . ds -- \|\(em\| | |
| 38 . ds PI \(*p | |
| 39 . ds L" `` | |
| 40 . ds R" '' | |
| 41 'br\} | |
| 42 .\" | |
| 43 .\" Escape single quotes in literal strings from groff's Unicode transform. | |
| 44 .ie \n(.g .ds Aq \(aq | |
| 45 .el .ds Aq ' | |
| 46 .\" | |
| 47 .\" If the F register is turned on, we'll generate index entries on stderr for | |
| 48 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index | |
| 49 .\" entries marked with X<> in POD. Of course, you'll have to process the | |
| 50 .\" output yourself in some meaningful fashion. | |
| 51 .ie \nF \{\ | |
| 52 . de IX | |
| 53 . tm Index:\\$1\t\\n%\t"\\$2" | |
| 54 .. | |
| 55 . nr % 0 | |
| 56 . rr F | |
| 57 .\} | |
| 58 .el \{\ | |
| 59 . de IX | |
| 60 .. | |
| 61 .\} | |
| 62 .\" | |
| 63 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
| 64 .\" Fear. Run. Save yourself. No user-serviceable parts. | |
| 65 . \" fudge factors for nroff and troff | |
| 66 .if n \{\ | |
| 67 . ds #H 0 | |
| 68 . ds #V .8m | |
| 69 . ds #F .3m | |
| 70 . ds #[ \f1 | |
| 71 . ds #] \fP | |
| 72 .\} | |
| 73 .if t \{\ | |
| 74 . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
| 75 . ds #V .6m | |
| 76 . ds #F 0 | |
| 77 . ds #[ \& | |
| 78 . ds #] \& | |
| 79 .\} | |
| 80 . \" simple accents for nroff and troff | |
| 81 .if n \{\ | |
| 82 . ds ' \& | |
| 83 . ds ` \& | |
| 84 . ds ^ \& | |
| 85 . ds , \& | |
| 86 . ds ~ ~ | |
| 87 . ds / | |
| 88 .\} | |
| 89 .if t \{\ | |
| 90 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
| 91 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
| 92 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
| 93 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
| 94 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
| 95 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
| 96 .\} | |
| 97 . \" troff and (daisy-wheel) nroff accents | |
| 98 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
| 99 .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
| 100 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
| 101 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
| 102 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
| 103 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
| 104 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
| 105 .ds ae a\h'-(\w'a'u*4/10)'e | |
| 106 .ds Ae A\h'-(\w'A'u*4/10)'E | |
| 107 . \" corrections for vroff | |
| 108 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
| 109 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
| 110 . \" for low resolution devices (crt and lpr) | |
| 111 .if \n(.H>23 .if \n(.V>19 \ | |
| 112 \{\ | |
| 113 . ds : e | |
| 114 . ds 8 ss | |
| 115 . ds o a | |
| 116 . ds d- d\h'-1'\(ga | |
| 117 . ds D- D\h'-1'\(hy | |
| 118 . ds th \o'bp' | |
| 119 . ds Th \o'LP' | |
| 120 . ds ae ae | |
| 121 . ds Ae AE | |
| 122 .\} | |
| 123 .rm #[ #] #H #V #F C | |
| 124 .\" ======================================================================== | |
| 125 .\" | |
| 126 .IX Title "MACCSKEYSFINGERPRINTS 1" | |
| 127 .TH MACCSKEYSFINGERPRINTS 1 "2015-03-29" "perl v5.14.2" "MayaChemTools" | |
| 128 .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
| 129 .\" way too many mistakes in technical documents. | |
| 130 .if n .ad l | |
| 131 .nh | |
| 132 .SH "NAME" | |
| 133 MACCSKeysFingerprints.pl \- Generate MACCS key fingerprints for SD files | |
| 134 .SH "SYNOPSIS" | |
| 135 .IX Header "SYNOPSIS" | |
| 136 MACCSKeysFingerprints.pl SDFile(s)... | |
| 137 .PP | |
| 138 MACCSKeysFingerprints.pl [\fB\-\-AromaticityModel\fR \fIAromaticityModelType\fR] | |
| 139 [\fB\-\-BitsOrder\fR \fIAscending | Descending\fR] | |
| 140 [\fB\-b, \-\-BitStringFormat\fR \fIBinaryString | HexadecimalString\fR] | |
| 141 [\fB\-\-CompoundID\fR \fIDataFieldName or LabelPrefixString\fR] [\fB\-\-CompoundIDLabel\fR \fItext\fR] | |
| 142 [\fB\-\-CompoundIDMode\fR \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR] | |
| 143 [\fB\-\-DataFields\fR \fI\*(L"FieldLabel1,FieldLabel2,...\*(R"\fR] [\fB\-d, \-\-DataFieldsMode\fR \fIAll | Common | Specify | CompoundID\fR] | |
| 144 [\fB\-f, \-\-Filter\fR \fIYes | No\fR] [\fB\-\-FingerprintsLabel\fR \fItext\fR] [\fB\-h, \-\-help\fR] [\fB\-k, \-\-KeepLargestComponent\fR \fIYes | No\fR] | |
| 145 [\fB\-m, \-\-mode\fR \fIMACCSKeyBits | MACCSKeyCount\fR] [\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR] | |
| 146 [\fB\-\-output\fR \fI\s-1SD\s0 | \s-1FP\s0 | text | all\fR] [\fB\-o, \-\-overwrite\fR] | |
| 147 [\fB\-q, \-\-quote\fR \fIYes | No\fR] [\fB\-r, \-\-root\fR \fIRootName\fR] [\fB\-s, \-\-size\fR \fInumber\fR] | |
| 148 [\fB\-v, \-\-VectorStringFormat\fR \fIIDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString\fR] | |
| 149 [\fB\-w, \-\-WorkingDir\fR \fIDirName\fR] | |
| 150 .SH "DESCRIPTION" | |
| 151 .IX Header "DESCRIPTION" | |
| 152 Generate \s-1MACCS\s0 (Molecular ACCess System) keys fingerprints [ Ref 45\-47 ] for \fISDFile(s)\fR | |
| 153 and create appropriate \s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) containing fingerprints bit-vector or | |
| 154 vector strings corresponding to molecular fingerprints. | |
| 155 .PP | |
| 156 Multiple SDFile names are separated by spaces. The valid file extensions are \fI.sdf\fR | |
| 157 and \fI.sd\fR. All other file names are ignored. All the \s-1SD\s0 files in a current directory | |
| 158 can be specified either by \fI*.sdf\fR or the current directory name. | |
| 159 .PP | |
| 160 For each \s-1MACCS\s0 keys definition, atoms are processed to determine their membership to the key | |
| 161 and the appropriate molecular fingerprints strings are generated. An atom can belong to multiple | |
| 162 \&\s-1MACCS\s0 keys. | |
| 163 .PP | |
| 164 For \fIMACCSKeyBits\fR value of \fB\-m, \-\-mode\fR option, a fingerprint bit-vector string containing | |
| 165 zeros and ones is generated and for \fIMACCSKeyCount\fR value, a fingerprint vector string | |
| 166 corresponding to number of \s-1MACCS\s0 keys [ Ref 45\-47 ] is generated. | |
| 167 .PP | |
| 168 \&\fIMACCSKeyBits | MACCSKeyCount\fR values for \fB\-m, \-\-mode\fR option along with two possible | |
| 169 \&\fI166 | 322\fR values of \fB\-s, \-\-size\fR supports generation of four different types of \s-1MACCS\s0 | |
| 170 keys fingerprint: \fIMACCS166KeyBits, MACCS166KeyCount, MACCS322KeyBits, MACCS322KeyCount\fR. | |
| 171 .PP | |
| 172 Example of \fI\s-1SD\s0\fR file containing \s-1MAACS\s0 keys fingerprints string data: | |
| 173 .PP | |
| 174 .Vb 10 | |
| 175 \& ... ... | |
| 176 \& ... ... | |
| 177 \& $$$$ | |
| 178 \& ... ... | |
| 179 \& ... ... | |
| 180 \& ... ... | |
| 181 \& 41 44 0 0 0 0 0 0 0 0999 V2000 | |
| 182 \& \-3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 183 \& ... ... | |
| 184 \& 2 3 1 0 0 0 0 | |
| 185 \& ... ... | |
| 186 \& M END | |
| 187 \& > <CmpdID> | |
| 188 \& Cmpd1 | |
| 189 \& | |
| 190 \& > <MACCSKeysFingerprints> | |
| 191 \& FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;000000000 | |
| 192 \& 00000000000000000000000000000000100100001001000000001001000000001110001 | |
| 193 \& 00101010111100011011000100110110000011011110100110111111111111011111111 | |
| 194 \& 11111111110111000 | |
| 195 \& | |
| 196 \& $$$$ | |
| 197 \& ... ... | |
| 198 \& ... ... | |
| 199 .Ve | |
| 200 .PP | |
| 201 Example of \fI\s-1FP\s0\fR file containing \s-1MAACS\s0 keys fingerprints string data: | |
| 202 .PP | |
| 203 .Vb 10 | |
| 204 \& # | |
| 205 \& # Package = MayaChemTools 7.4 | |
| 206 \& # Release Date = Oct 21, 2010 | |
| 207 \& # | |
| 208 \& # TimeStamp = Fri Mar 11 14:57:24 2011 | |
| 209 \& # | |
| 210 \& # FingerprintsStringType = FingerprintsBitVector | |
| 211 \& # | |
| 212 \& # Description = MACCSKeyBits | |
| 213 \& # Size = 166 | |
| 214 \& # BitStringFormat = BinaryString | |
| 215 \& # BitsOrder = Ascending | |
| 216 \& # | |
| 217 \& Cmpd1 00000000000000000000000000000000000000000100100001001000000001... | |
| 218 \& Cmpd2 00000000000000000000000010000000001000000010000000001000000000... | |
| 219 \& ... ... | |
| 220 \& ... .. | |
| 221 .Ve | |
| 222 .PP | |
| 223 Example of \s-1CSV\s0 \fIText\fR file containing \s-1MAACS\s0 keys fingerprints string data: | |
| 224 .PP | |
| 225 .Vb 7 | |
| 226 \& "CompoundID","MACCSKeysFingerprints" | |
| 227 \& "Cmpd1","FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending; | |
| 228 \& 00000000000000000000000000000000000000000100100001001000000001001000000 | |
| 229 \& 00111000100101010111100011011000100110110000011011110100110111111111111 | |
| 230 \& 01111111111111111110111000" | |
| 231 \& ... ... | |
| 232 \& ... ... | |
| 233 .Ve | |
| 234 .PP | |
| 235 The current release of MayaChemTools generates the following types of \s-1MACCS\s0 keys | |
| 236 fingerprints bit-vector and vector strings: | |
| 237 .PP | |
| 238 .Vb 4 | |
| 239 \& FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000 | |
| 240 \& 0000000000000000000000000000000001001000010010000000010010000000011100 | |
| 241 \& 0100101010111100011011000100110110000011011110100110111111111111011111 | |
| 242 \& 11111111111110111000 | |
| 243 \& | |
| 244 \& FingerprintsBitVector;MACCSKeyBits;166;HexadecimalString;Ascending;000 | |
| 245 \& 000000021210210e845f8d8c60b79dffbffffd1 | |
| 246 \& | |
| 247 \& FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011 | |
| 248 \& 1110011111100101111111000111101100110000000000000011100010000000000000 | |
| 249 \& 0000000000000000000000000000000000000000000000101000000000000000000000 | |
| 250 \& 0000000000000000000000000000000000000000000000000000000000000000000000 | |
| 251 \& 0000000000000000000000000000000000000011000000000000000000000000000000 | |
| 252 \& 0000000000000000000000000000000000000000 | |
| 253 \& | |
| 254 \& FingerprintsBitVector;MACCSKeyBits;322;HexadecimalString;Ascending;7d7 | |
| 255 \& e7af3edc000c1100000000000000500000000000000000000000000000000300000000 | |
| 256 \& 000000000 | |
| 257 \& | |
| 258 \& FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri | |
| 259 \& ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 260 \& 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 | |
| 261 \& 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0 | |
| 262 \& 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1 | |
| 263 \& 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1 | |
| 264 \& | |
| 265 \& FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri | |
| 266 \& ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0 | |
| 267 \& 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0 | |
| 268 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 269 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0 | |
| 270 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... | |
| 271 .Ve | |
| 272 .SH "OPTIONS" | |
| 273 .IX Header "OPTIONS" | |
| 274 .IP "\fB\-\-AromaticityModel\fR \fIMDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel\fR" 4 | |
| 275 .IX Item "--AromaticityModel MDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel" | |
| 276 Specify aromaticity model to use during detection of aromaticity. Possible values in the current | |
| 277 release are: \fIMDLAromaticityModel, TriposAromaticityModel, MMFFAromaticityModel, | |
| 278 ChemAxonBasicAromaticityModel, ChemAxonGeneralAromaticityModel, DaylightAromaticityModel | |
| 279 or MayaChemToolsAromaticityModel\fR. Default value: \fIMayaChemToolsAromaticityModel\fR. | |
| 280 .Sp | |
| 281 The supported aromaticity model names along with model specific control parameters | |
| 282 are defined in \fBAromaticityModelsData.csv\fR, which is distributed with the current release | |
| 283 and is available under \fBlib/data\fR directory. \fBMolecule.pm\fR module retrieves data from | |
| 284 this file during class instantiation and makes it available to method \fBDetectAromaticity\fR | |
| 285 for detecting aromaticity corresponding to a specific model. | |
| 286 .IP "\fB\-\-BitsOrder\fR \fIAscending | Descending\fR" 4 | |
| 287 .IX Item "--BitsOrder Ascending | Descending" | |
| 288 Bits order to use during generation of fingerprints bit-vector string for \fIMACCSKeyBits\fR value of | |
| 289 \&\fB\-m, \-\-mode\fR option. Possible values: \fIAscending, Descending\fR. Default: \fIAscending\fR. | |
| 290 .Sp | |
| 291 \&\fIAscending\fR bit order which corresponds to first bit in each byte as the lowest bit as | |
| 292 opposed to the highest bit. | |
| 293 .Sp | |
| 294 Internally, bits are stored in \fIAscending\fR order using Perl vec function. Regardless | |
| 295 of machine order, big-endian or little-endian, vec function always considers first | |
| 296 string byte as the lowest byte and first bit within each byte as the lowest bit. | |
| 297 .IP "\fB\-b, \-\-BitStringFormat\fR \fIBinaryString | HexadecimalString\fR" 4 | |
| 298 .IX Item "-b, --BitStringFormat BinaryString | HexadecimalString" | |
| 299 Format of fingerprints bit-vector string data in output \s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) specified by | |
| 300 \&\fB\-\-output\fR used during \fIMACCSKeyBits\fR value of \fB\-m, \-\-mode\fR option. Possible | |
| 301 values: \fIBinaryString, HexadecimalString\fR. Default value: \fIBinaryString\fR. | |
| 302 .Sp | |
| 303 \&\fIBinaryString\fR corresponds to an \s-1ASCII\s0 string containing 1s and 0s. \fIHexadecimalString\fR | |
| 304 contains bit values in \s-1ASCII\s0 hexadecimal format. | |
| 305 .Sp | |
| 306 Examples: | |
| 307 .Sp | |
| 308 .Vb 4 | |
| 309 \& FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000 | |
| 310 \& 0000000000000000000000000000000001001000010010000000010010000000011100 | |
| 311 \& 0100101010111100011011000100110110000011011110100110111111111111011111 | |
| 312 \& 11111111111110111000 | |
| 313 \& | |
| 314 \& FingerprintsBitVector;MACCSKeyBits;166;HexadecimalString;Ascending;000 | |
| 315 \& 000000021210210e845f8d8c60b79dffbffffd1 | |
| 316 \& | |
| 317 \& FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011 | |
| 318 \& 1110011111100101111111000111101100110000000000000011100010000000000000 | |
| 319 \& 0000000000000000000000000000000000000000000000101000000000000000000000 | |
| 320 \& 0000000000000000000000000000000000000000000000000000000000000000000000 | |
| 321 \& 0000000000000000000000000000000000000011000000000000000000000000000000 | |
| 322 \& 0000000000000000000000000000000000000000 | |
| 323 \& | |
| 324 \& FingerprintsBitVector;MACCSKeyBits;322;HexadecimalString;Ascending;7d7 | |
| 325 \& e7af3edc000c1100000000000000500000000000000000000000000000000300000000 | |
| 326 \& 000000000 | |
| 327 .Ve | |
| 328 .IP "\fB\-\-CompoundID\fR \fIDataFieldName or LabelPrefixString\fR" 4 | |
| 329 .IX Item "--CompoundID DataFieldName or LabelPrefixString" | |
| 330 This value is \fB\-\-CompoundIDMode\fR specific and indicates how compound \s-1ID\s0 is generated. | |
| 331 .Sp | |
| 332 For \fIDataField\fR value of \fB\-\-CompoundIDMode\fR option, it corresponds to datafield label name | |
| 333 whose value is used as compound \s-1ID\s0; otherwise, it's a prefix string used for generating compound | |
| 334 IDs like LabelPrefixString<Number>. Default value, \fICmpd\fR, generates compound IDs which | |
| 335 look like Cmpd<Number>. | |
| 336 .Sp | |
| 337 Examples for \fIDataField\fR value of \fB\-\-CompoundIDMode\fR: | |
| 338 .Sp | |
| 339 .Vb 2 | |
| 340 \& MolID | |
| 341 \& ExtReg | |
| 342 .Ve | |
| 343 .Sp | |
| 344 Examples for \fILabelPrefix\fR or \fIMolNameOrLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR: | |
| 345 .Sp | |
| 346 .Vb 1 | |
| 347 \& Compound | |
| 348 .Ve | |
| 349 .Sp | |
| 350 The value specified above generates compound IDs which correspond to Compound<Number> | |
| 351 instead of default value of Cmpd<Number>. | |
| 352 .IP "\fB\-\-CompoundIDLabel\fR \fItext\fR" 4 | |
| 353 .IX Item "--CompoundIDLabel text" | |
| 354 Specify compound \s-1ID\s0 column label for \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) used during \fICompoundID\fR value | |
| 355 of \fB\-\-DataFieldsMode\fR option. Default: \fICompoundID\fR. | |
| 356 .IP "\fB\-\-CompoundIDMode\fR \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR" 4 | |
| 357 .IX Item "--CompoundIDMode DataField | MolName | LabelPrefix | MolNameOrLabelPrefix" | |
| 358 Specify how to generate compound IDs and write to \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) along with generated | |
| 359 fingerprints for \fI\s-1FP\s0 | text | all\fR values of \fB\-\-output\fR option: use a \fISDFile(s)\fR datafield value; | |
| 360 use molname line from \fISDFile(s)\fR; generate a sequential \s-1ID\s0 with specific prefix; use combination | |
| 361 of both MolName and LabelPrefix with usage of LabelPrefix values for empty molname lines. | |
| 362 .Sp | |
| 363 Possible values: \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR. | |
| 364 Default: \fILabelPrefix\fR. | |
| 365 .Sp | |
| 366 For \fIMolNameAndLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR, molname line in \fISDFile(s)\fR takes | |
| 367 precedence over sequential compound IDs generated using \fILabelPrefix\fR and only empty molname | |
| 368 values are replaced with sequential compound IDs. | |
| 369 .Sp | |
| 370 This is only used for \fICompoundID\fR value of \fB\-\-DataFieldsMode\fR option. | |
| 371 .ie n .IP "\fB\-\-DataFields\fR \fI""FieldLabel1,FieldLabel2,...""\fR" 4 | |
| 372 .el .IP "\fB\-\-DataFields\fR \fI``FieldLabel1,FieldLabel2,...''\fR" 4 | |
| 373 .IX Item "--DataFields FieldLabel1,FieldLabel2,..." | |
| 374 Comma delimited list of \fISDFiles(s)\fR data fields to extract and write to \s-1CSV/TSV\s0 text file(s) along | |
| 375 with generated fingerprints for \fItext | all\fR values of \fB\-\-output\fR option. | |
| 376 .Sp | |
| 377 This is only used for \fISpecify\fR value of \fB\-\-DataFieldsMode\fR option. | |
| 378 .Sp | |
| 379 Examples: | |
| 380 .Sp | |
| 381 .Vb 2 | |
| 382 \& Extreg | |
| 383 \& MolID,CompoundName | |
| 384 .Ve | |
| 385 .IP "\fB\-d, \-\-DataFieldsMode\fR \fIAll | Common | Specify | CompoundID\fR" 4 | |
| 386 .IX Item "-d, --DataFieldsMode All | Common | Specify | CompoundID" | |
| 387 Specify how data fields in \fISDFile(s)\fR are transferred to output \s-1CSV/TSV\s0 text file(s) along | |
| 388 with generated fingerprints for \fItext | all\fR values of \fB\-\-output\fR option: transfer all \s-1SD\s0 | |
| 389 data field; transfer \s-1SD\s0 data files common to all compounds; extract specified data fields; | |
| 390 generate a compound \s-1ID\s0 using molname line, a compound prefix, or a combination of both. | |
| 391 Possible values: \fIAll | Common | specify | CompoundID\fR. Default value: \fICompoundID\fR. | |
| 392 .IP "\fB\-f, \-\-Filter\fR \fIYes | No\fR" 4 | |
| 393 .IX Item "-f, --Filter Yes | No" | |
| 394 Specify whether to check and filter compound data in SDFile(s). Possible values: \fIYes or No\fR. | |
| 395 Default value: \fIYes\fR. | |
| 396 .Sp | |
| 397 By default, compound data is checked before calculating fingerprints and compounds containing | |
| 398 atom data corresponding to non-element symbols or no atom data are ignored. | |
| 399 .IP "\fB\-\-FingerprintsLabel\fR \fItext\fR" 4 | |
| 400 .IX Item "--FingerprintsLabel text" | |
| 401 \&\s-1SD\s0 data label or text file column label to use for fingerprints string in output \s-1SD\s0 or | |
| 402 \&\s-1CSV/TSV\s0 text file(s) specified by \fB\-\-output\fR. Default value: \fIMACCSKeyFingerprints\fR. | |
| 403 .IP "\fB\-h, \-\-help\fR" 4 | |
| 404 .IX Item "-h, --help" | |
| 405 Print this help message. | |
| 406 .IP "\fB\-k, \-\-KeepLargestComponent\fR \fIYes | No\fR" 4 | |
| 407 .IX Item "-k, --KeepLargestComponent Yes | No" | |
| 408 Generate fingerprints for only the largest component in molecule. Possible values: | |
| 409 \&\fIYes or No\fR. Default value: \fIYes\fR. | |
| 410 .Sp | |
| 411 For molecules containing multiple connected components, fingerprints can be generated | |
| 412 in two different ways: use all connected components or just the largest connected | |
| 413 component. By default, all atoms except for the largest connected component are | |
| 414 deleted before generation of fingerprints. | |
| 415 .IP "\fB\-m, \-\-mode\fR \fIMACCSKeyBits | MACCSKeyCount\fR" 4 | |
| 416 .IX Item "-m, --mode MACCSKeyBits | MACCSKeyCount" | |
| 417 Specify type of \s-1MACCS\s0 keys [ Ref 45\-47 ] fingerprints to generate for molecules in \fISDFile(s)\fR. | |
| 418 Possible values: \fIMACCSKeyBits, MACCSKeyCount\fR. Default value: \fIMACCSKeyBits\fR. | |
| 419 .Sp | |
| 420 For \fIMACCSKeyBits\fR value of \fB\-m, \-\-mode\fR option, a fingerprint bit-vector string containing | |
| 421 zeros and ones is generated and for \fIMACCSKeyCount\fR value, a fingerprint vector string | |
| 422 corresponding to number of \s-1MACCS\s0 keys is generated. | |
| 423 .Sp | |
| 424 \&\fIMACCSKeyBits | MACCSKeyCount\fR values for \fB\-m, \-\-mode\fR option along with two possible | |
| 425 \&\fI166 | 322\fR values of \fB\-s, \-\-size\fR supports generation of four different types of \s-1MACCS\s0 | |
| 426 keys fingerprint: \fIMACCS166KeyBits, MACCS166KeyCount, MACCS322KeyBits, MACCS322KeyCount\fR. | |
| 427 .Sp | |
| 428 Definition of \s-1MACCS\s0 keys uses the following atom and bond symbols to define atom and | |
| 429 bond environments: | |
| 430 .Sp | |
| 431 .Vb 1 | |
| 432 \& Atom symbols for 166 keys [ Ref 47 ]: | |
| 433 \& | |
| 434 \& A : Any valid periodic table element symbol | |
| 435 \& Q : Hetro atoms; any non\-C or non\-H atom | |
| 436 \& X : Halogens; F, Cl, Br, I | |
| 437 \& Z : Others; other than H, C, N, O, Si, P, S, F, Cl, Br, I | |
| 438 \& | |
| 439 \& Atom symbols for 322 keys [ Ref 46 ]: | |
| 440 \& | |
| 441 \& A : Any valid periodic table element symbol | |
| 442 \& Q : Hetro atoms; any non\-C or non\-H atom | |
| 443 \& X : Others; other than H, C, N, O, Si, P, S, F, Cl, Br, I | |
| 444 \& Z is neither defined nor used | |
| 445 \& | |
| 446 \& Bond types: | |
| 447 \& | |
| 448 \& \- : Single | |
| 449 \& = : Double | |
| 450 \& T : Triple | |
| 451 \& # : Triple | |
| 452 \& ~ : Single or double query bond | |
| 453 \& % : An aromatic query bond | |
| 454 \& | |
| 455 \& None : Any bond type; no explicit bond specified | |
| 456 \& | |
| 457 \& $ : Ring bond; $ before a bond type specifies ring bond | |
| 458 \& ! : Chain or non\-ring bond; ! before a bond type specifies chain bond | |
| 459 \& | |
| 460 \& @ : A ring linkage and the number following it specifies the | |
| 461 \& atoms position in the line, thus @1 means linked back to the first | |
| 462 \& atom in the list. | |
| 463 \& | |
| 464 \& Aromatic: Kekule or Arom5 | |
| 465 \& | |
| 466 \& Kekule: Bonds in 6\-membered rings with alternate single/double bonds | |
| 467 \& or perimeter bonds | |
| 468 \& Arom5: Bonds in 5\-membered rings with two double bonds and a hetro | |
| 469 \& atom at the apex of the ring. | |
| 470 .Ve | |
| 471 .Sp | |
| 472 \&\s-1MACCS\s0 166 keys [ Ref 45\-47 ] are defined as follows: | |
| 473 .Sp | |
| 474 .Vb 1 | |
| 475 \& Key Description | |
| 476 \& | |
| 477 \& 1 ISOTOPE | |
| 478 \& 2 103 < ATOMIC NO. < 256 | |
| 479 \& 3 GROUP IVA,VA,VIA PERIODS 4\-6 (Ge...) | |
| 480 \& 4 ACTINIDE | |
| 481 \& 5 GROUP IIIB,IVB (Sc...) | |
| 482 \& 6 LANTHANIDE | |
| 483 \& 7 GROUP VB,VIB,VIIB (V...) | |
| 484 \& 8 QAAA@1 | |
| 485 \& 9 GROUP VIII (Fe...) | |
| 486 \& 10 GROUP IIA (ALKALINE EARTH) | |
| 487 \& 11 4M RING | |
| 488 \& 12 GROUP IB,IIB (Cu...) | |
| 489 \& 13 ON(C)C | |
| 490 \& 14 S\-S | |
| 491 \& 15 OC(O)O | |
| 492 \& 16 QAA@1 | |
| 493 \& 17 CTC | |
| 494 \& 18 GROUP IIIA (B...) | |
| 495 \& 19 7M RING | |
| 496 \& 20 SI | |
| 497 \& 21 C=C(Q)Q | |
| 498 \& 22 3M RING | |
| 499 \& 23 NC(O)O | |
| 500 \& 24 N\-O | |
| 501 \& 25 NC(N)N | |
| 502 \& 26 C$=C($A)$A | |
| 503 \& 27 I | |
| 504 \& 28 QCH2Q | |
| 505 \& 29 P | |
| 506 \& 30 CQ(C)(C)A | |
| 507 \& 31 QX | |
| 508 \& 32 CSN | |
| 509 \& 33 NS | |
| 510 \& 34 CH2=A | |
| 511 \& 35 GROUP IA (ALKALI METAL) | |
| 512 \& 36 S HETEROCYCLE | |
| 513 \& 37 NC(O)N | |
| 514 \& 38 NC(C)N | |
| 515 \& 39 OS(O)O | |
| 516 \& 40 S\-O | |
| 517 \& 41 CTN | |
| 518 \& 42 F | |
| 519 \& 43 QHAQH | |
| 520 \& 44 OTHER | |
| 521 \& 45 C=CN | |
| 522 \& 46 BR | |
| 523 \& 47 SAN | |
| 524 \& 48 OQ(O)O | |
| 525 \& 49 CHARGE | |
| 526 \& 50 C=C(C)C | |
| 527 \& 51 CSO | |
| 528 \& 52 NN | |
| 529 \& 53 QHAAAQH | |
| 530 \& 54 QHAAQH | |
| 531 \& 55 OSO | |
| 532 \& 56 ON(O)C | |
| 533 \& 57 O HETEROCYCLE | |
| 534 \& 58 QSQ | |
| 535 \& 59 Snot%A%A | |
| 536 \& 60 S=O | |
| 537 \& 61 AS(A)A | |
| 538 \& 62 A$A!A$A | |
| 539 \& 63 N=O | |
| 540 \& 64 A$A!S | |
| 541 \& 65 C%N | |
| 542 \& 66 CC(C)(C)A | |
| 543 \& 67 QS | |
| 544 \& 68 QHQH (&...) | |
| 545 \& 69 QQH | |
| 546 \& 70 QNQ | |
| 547 \& 71 NO | |
| 548 \& 72 OAAO | |
| 549 \& 73 S=A | |
| 550 \& 74 CH3ACH3 | |
| 551 \& 75 A!N$A | |
| 552 \& 76 C=C(A)A | |
| 553 \& 77 NAN | |
| 554 \& 78 C=N | |
| 555 \& 79 NAAN | |
| 556 \& 80 NAAAN | |
| 557 \& 81 SA(A)A | |
| 558 \& 82 ACH2QH | |
| 559 \& 83 QAAAA@1 | |
| 560 \& 84 NH2 | |
| 561 \& 85 CN(C)C | |
| 562 \& 86 CH2QCH2 | |
| 563 \& 87 X!A$A | |
| 564 \& 88 S | |
| 565 \& 89 OAAAO | |
| 566 \& 90 QHAACH2A | |
| 567 \& 91 QHAAACH2A | |
| 568 \& 92 OC(N)C | |
| 569 \& 93 QCH3 | |
| 570 \& 94 QN | |
| 571 \& 95 NAAO | |
| 572 \& 96 5M RING | |
| 573 \& 97 NAAAO | |
| 574 \& 98 QAAAAA@1 | |
| 575 \& 99 C=C | |
| 576 \& 100 ACH2N | |
| 577 \& 101 8M RING | |
| 578 \& 102 QO | |
| 579 \& 103 CL | |
| 580 \& 104 QHACH2A | |
| 581 \& 105 A$A($A)$A | |
| 582 \& 106 QA(Q)Q | |
| 583 \& 107 XA(A)A | |
| 584 \& 108 CH3AAACH2A | |
| 585 \& 109 ACH2O | |
| 586 \& 110 NCO | |
| 587 \& 111 NACH2A | |
| 588 \& 112 AA(A)(A)A | |
| 589 \& 113 Onot%A%A | |
| 590 \& 114 CH3CH2A | |
| 591 \& 115 CH3ACH2A | |
| 592 \& 116 CH3AACH2A | |
| 593 \& 117 NAO | |
| 594 \& 118 ACH2CH2A > 1 | |
| 595 \& 119 N=A | |
| 596 \& 120 HETEROCYCLIC ATOM > 1 (&...) | |
| 597 \& 121 N HETEROCYCLE | |
| 598 \& 122 AN(A)A | |
| 599 \& 123 OCO | |
| 600 \& 124 QQ | |
| 601 \& 125 AROMATIC RING > 1 | |
| 602 \& 126 A!O!A | |
| 603 \& 127 A$A!O > 1 (&...) | |
| 604 \& 128 ACH2AAACH2A | |
| 605 \& 129 ACH2AACH2A | |
| 606 \& 130 QQ > 1 (&...) | |
| 607 \& 131 QH > 1 | |
| 608 \& 132 OACH2A | |
| 609 \& 133 A$A!N | |
| 610 \& 134 X (HALOGEN) | |
| 611 \& 135 Nnot%A%A | |
| 612 \& 136 O=A > 1 | |
| 613 \& 137 HETEROCYCLE | |
| 614 \& 138 QCH2A > 1 (&...) | |
| 615 \& 139 OH | |
| 616 \& 140 O > 3 (&...) | |
| 617 \& 141 CH3 > 2 (&...) | |
| 618 \& 142 N > 1 | |
| 619 \& 143 A$A!O | |
| 620 \& 144 Anot%A%Anot%A | |
| 621 \& 145 6M RING > 1 | |
| 622 \& 146 O > 2 | |
| 623 \& 147 ACH2CH2A | |
| 624 \& 148 AQ(A)A | |
| 625 \& 149 CH3 > 1 | |
| 626 \& 150 A!A$A!A | |
| 627 \& 151 NH | |
| 628 \& 152 OC(C)C | |
| 629 \& 153 QCH2A | |
| 630 \& 154 C=O | |
| 631 \& 155 A!CH2!A | |
| 632 \& 156 NA(A)A | |
| 633 \& 157 C\-O | |
| 634 \& 158 C\-N | |
| 635 \& 159 O > 1 | |
| 636 \& 160 CH3 | |
| 637 \& 161 N | |
| 638 \& 162 AROMATIC | |
| 639 \& 163 6M RING | |
| 640 \& 164 O | |
| 641 \& 165 RING | |
| 642 \& 166 FRAGMENTS | |
| 643 .Ve | |
| 644 .Sp | |
| 645 \&\s-1MACCS\s0 322 keys set as defined in tables 1, 2 and 3 [ Ref 46 ] include: | |
| 646 .Sp | |
| 647 .Vb 3 | |
| 648 \& . 26 atom properties of type P, as listed in Table 1 | |
| 649 \& . 32 one\-atom environments, as listed in Table 3 | |
| 650 \& . 264 atom\-bond\-atom combinations listed in Table 4 | |
| 651 .Ve | |
| 652 .Sp | |
| 653 Total number of keys in three tables is : 322 | |
| 654 .Sp | |
| 655 Atom symbol, X, used for 322 keys [ Ref 46 ] doesn't refer to Halogens as it does for 166 keys. In | |
| 656 order to keep the definition of 322 keys consistent with the published definitions, the symbol X is | |
| 657 used to imply \*(L"others\*(R" atoms, but it's internally mapped to symbol X as defined for 166 keys | |
| 658 during the generation of key values. | |
| 659 .Sp | |
| 660 Atom properties-based keys (26): | |
| 661 .Sp | |
| 662 .Vb 10 | |
| 663 \& Key Description | |
| 664 \& 1 A(AAA) or AA(A)A \- atom with at least three neighbors | |
| 665 \& 2 Q \- heteroatom | |
| 666 \& 3 Anot%not\-A \- atom involved in one or more multiple bonds, not aromatic | |
| 667 \& 4 A(AAAA) or AA(A)(A)A \- atom with at least four neighbors | |
| 668 \& 5 A(QQ) or QA(Q) \- atom with at least two heteroatom neighbors | |
| 669 \& 6 A(QQQ) or QA(Q)Q \- atom with at least three heteroatom neighbors | |
| 670 \& 7 QH \- heteroatom with at least one hydrogen attached | |
| 671 \& 8 CH2(AA) or ACH2A \- carbon with at least two single bonds and at least | |
| 672 \& two hydrogens attached | |
| 673 \& 9 CH3(A) or ACH3 \- carbon with at least one single bond and at least three | |
| 674 \& hydrogens attached | |
| 675 \& 10 Halogen | |
| 676 \& 11 A(\-A\-A\-A) or A\-A(\-A)\-A \- atom has at least three single bonds | |
| 677 \& 12 AAAAAA@1 > 2 \- atom is in at least two different six\-membered rings | |
| 678 \& 13 A($A$A$A) or A$A($A)$A \- atom has more than two ring bonds | |
| 679 \& 14 A$A!A$A \- atom is at a ring/chain boundary. When a comparison is done | |
| 680 \& with another atom the path passes through the chain bond. | |
| 681 \& 15 Anot%A%Anot%A \- atom is at an aromatic/nonaromatic boundary. When a | |
| 682 \& comparison is done with another atom the path | |
| 683 \& passes through the aromatic bond. | |
| 684 \& 16 A!A!A \- atom with more than one chain bond | |
| 685 \& 17 A!A$A!A \- atom is at a ring/chain boundary. When a comparison is done | |
| 686 \& with another atom the path passes through the ring bond. | |
| 687 \& 18 A%Anot%A%A \- atom is at an aromatic/nonaromatic boundary. When a | |
| 688 \& comparison is done with another atom the | |
| 689 \& path passes through the nonaromatic bond. | |
| 690 \& 19 HETEROCYCLE \- atom is a heteroatom in a ring. | |
| 691 \& 20 rare properties: atom with five or more neighbors, atom in | |
| 692 \& four or more rings, or atom types other than | |
| 693 \& H, C, N, O, S, F, Cl, Br, or I | |
| 694 \& 21 rare properties: atom has a charge, is an isotope, has two or | |
| 695 \& more multiple bonds, or has a triple bond. | |
| 696 \& 22 N \- nitrogen | |
| 697 \& 23 S \- sulfur | |
| 698 \& 24 O \- oxygen | |
| 699 \& 25 A(AA)A(A)A(AA) \- atom has two neighbors, each with three or | |
| 700 \& more neighbors (including the central atom). | |
| 701 \& 26 CHACH2 \- atom has two hydrocarbon (CH2) neighbors | |
| 702 .Ve | |
| 703 .Sp | |
| 704 Atomic environments properties-based keys (32): | |
| 705 .Sp | |
| 706 .Vb 10 | |
| 707 \& Key Description | |
| 708 \& 27 C(CC) | |
| 709 \& 28 C(CCC) | |
| 710 \& 29 C(CN) | |
| 711 \& 30 C(CCN) | |
| 712 \& 31 C(NN) | |
| 713 \& 32 C(NNC) | |
| 714 \& 33 C(NNN) | |
| 715 \& 34 C(CO) | |
| 716 \& 35 C(CCO) | |
| 717 \& 36 C(NO) | |
| 718 \& 37 C(NCO) | |
| 719 \& 38 C(NNO) | |
| 720 \& 39 C(OO) | |
| 721 \& 40 C(COO) | |
| 722 \& 41 C(NOO) | |
| 723 \& 42 C(OOO) | |
| 724 \& 43 Q(CC) | |
| 725 \& 44 Q(CCC) | |
| 726 \& 45 Q(CN) | |
| 727 \& 46 Q(CCN) | |
| 728 \& 47 Q(NN) | |
| 729 \& 48 Q(CNN) | |
| 730 \& 49 Q(NNN) | |
| 731 \& 50 Q(CO) | |
| 732 \& 51 Q(CCO) | |
| 733 \& 52 Q(NO) | |
| 734 \& 53 Q(CNO) | |
| 735 \& 54 Q(NNO) | |
| 736 \& 55 Q(OO) | |
| 737 \& 56 Q(COO) | |
| 738 \& 57 Q(NOO) | |
| 739 \& 58 Q(OOO) | |
| 740 .Ve | |
| 741 .Sp | |
| 742 Note: The first symbol is the central atom, with atoms bonded to the central atom listed in | |
| 743 parentheses. Q is any non-C, non-H atom. If only two atoms are in parentheses, there is | |
| 744 no implication concerning the other atoms bonded to the central atom. | |
| 745 .Sp | |
| 746 Atom-Bond-Atom properties-based keys: (264) | |
| 747 .Sp | |
| 748 .Vb 10 | |
| 749 \& Key Description | |
| 750 \& 59 C\-C | |
| 751 \& 60 C\-N | |
| 752 \& 61 C\-O | |
| 753 \& 62 C\-S | |
| 754 \& 63 C\-Cl | |
| 755 \& 64 C\-P | |
| 756 \& 65 C\-F | |
| 757 \& 66 C\-Br | |
| 758 \& 67 C\-Si | |
| 759 \& 68 C\-I | |
| 760 \& 69 C\-X | |
| 761 \& 70 N\-N | |
| 762 \& 71 N\-O | |
| 763 \& 72 N\-S | |
| 764 \& 73 N\-Cl | |
| 765 \& 74 N\-P | |
| 766 \& 75 N\-F | |
| 767 \& 76 N\-Br | |
| 768 \& 77 N\-Si | |
| 769 \& 78 N\-I | |
| 770 \& 79 N\-X | |
| 771 \& 80 O\-O | |
| 772 \& 81 O\-S | |
| 773 \& 82 O\-Cl | |
| 774 \& 83 O\-P | |
| 775 \& 84 O\-F | |
| 776 \& 85 O\-Br | |
| 777 \& 86 O\-Si | |
| 778 \& 87 O\-I | |
| 779 \& 88 O\-X | |
| 780 \& 89 S\-S | |
| 781 \& 90 S\-Cl | |
| 782 \& 91 S\-P | |
| 783 \& 92 S\-F | |
| 784 \& 93 S\-Br | |
| 785 \& 94 S\-Si | |
| 786 \& 95 S\-I | |
| 787 \& 96 S\-X | |
| 788 \& 97 Cl\-Cl | |
| 789 \& 98 Cl\-P | |
| 790 \& 99 Cl\-F | |
| 791 \& 100 Cl\-Br | |
| 792 \& 101 Cl\-Si | |
| 793 \& 102 Cl\-I | |
| 794 \& 103 Cl\-X | |
| 795 \& 104 P\-P | |
| 796 \& 105 P\-F | |
| 797 \& 106 P\-Br | |
| 798 \& 107 P\-Si | |
| 799 \& 108 P\-I | |
| 800 \& 109 P\-X | |
| 801 \& 110 F\-F | |
| 802 \& 111 F\-Br | |
| 803 \& 112 F\-Si | |
| 804 \& 113 F\-I | |
| 805 \& 114 F\-X | |
| 806 \& 115 Br\-Br | |
| 807 \& 116 Br\-Si | |
| 808 \& 117 Br\-I | |
| 809 \& 118 Br\-X | |
| 810 \& 119 Si\-Si | |
| 811 \& 120 Si\-I | |
| 812 \& 121 Si\-X | |
| 813 \& 122 I\-I | |
| 814 \& 123 I\-X | |
| 815 \& 124 X\-X | |
| 816 \& 125 C=C | |
| 817 \& 126 C=N | |
| 818 \& 127 C=O | |
| 819 \& 128 C=S | |
| 820 \& 129 C=Cl | |
| 821 \& 130 C=P | |
| 822 \& 131 C=F | |
| 823 \& 132 C=Br | |
| 824 \& 133 C=Si | |
| 825 \& 134 C=I | |
| 826 \& 135 C=X | |
| 827 \& 136 N=N | |
| 828 \& 137 N=O | |
| 829 \& 138 N=S | |
| 830 \& 139 N=Cl | |
| 831 \& 140 N=P | |
| 832 \& 141 N=F | |
| 833 \& 142 N=Br | |
| 834 \& 143 N=Si | |
| 835 \& 144 N=I | |
| 836 \& 145 N=X | |
| 837 \& 146 O=O | |
| 838 \& 147 O=S | |
| 839 \& 148 O=Cl | |
| 840 \& 149 O=P | |
| 841 \& 150 O=F | |
| 842 \& 151 O=Br | |
| 843 \& 152 O=Si | |
| 844 \& 153 O=I | |
| 845 \& 154 O=X | |
| 846 \& 155 S=S | |
| 847 \& 156 S=Cl | |
| 848 \& 157 S=P | |
| 849 \& 158 S=F | |
| 850 \& 159 S=Br | |
| 851 \& 160 S=Si | |
| 852 \& 161 S=I | |
| 853 \& 162 S=X | |
| 854 \& 163 Cl=Cl | |
| 855 \& 164 Cl=P | |
| 856 \& 165 Cl=F | |
| 857 \& 166 Cl=Br | |
| 858 \& 167 Cl=Si | |
| 859 \& 168 Cl=I | |
| 860 \& 169 Cl=X | |
| 861 \& 170 P=P | |
| 862 \& 171 P=F | |
| 863 \& 172 P=Br | |
| 864 \& 173 P=Si | |
| 865 \& 174 P=I | |
| 866 \& 175 P=X | |
| 867 \& 176 F=F | |
| 868 \& 177 F=Br | |
| 869 \& 178 F=Si | |
| 870 \& 179 F=I | |
| 871 \& 180 F=X | |
| 872 \& 181 Br=Br | |
| 873 \& 182 Br=Si | |
| 874 \& 183 Br=I | |
| 875 \& 184 Br=X | |
| 876 \& 185 Si=Si | |
| 877 \& 186 Si=I | |
| 878 \& 187 Si=X | |
| 879 \& 188 I=I | |
| 880 \& 189 I=X | |
| 881 \& 190 X=X | |
| 882 \& 191 C#C | |
| 883 \& 192 C#N | |
| 884 \& 193 C#O | |
| 885 \& 194 C#S | |
| 886 \& 195 C#Cl | |
| 887 \& 196 C#P | |
| 888 \& 197 C#F | |
| 889 \& 198 C#Br | |
| 890 \& 199 C#Si | |
| 891 \& 200 C#I | |
| 892 \& 201 C#X | |
| 893 \& 202 N#N | |
| 894 \& 203 N#O | |
| 895 \& 204 N#S | |
| 896 \& 205 N#Cl | |
| 897 \& 206 N#P | |
| 898 \& 207 N#F | |
| 899 \& 208 N#Br | |
| 900 \& 209 N#Si | |
| 901 \& 210 N#I | |
| 902 \& 211 N#X | |
| 903 \& 212 O#O | |
| 904 \& 213 O#S | |
| 905 \& 214 O#Cl | |
| 906 \& 215 O#P | |
| 907 \& 216 O#F | |
| 908 \& 217 O#Br | |
| 909 \& 218 O#Si | |
| 910 \& 219 O#I | |
| 911 \& 220 O#X | |
| 912 \& 221 S#S | |
| 913 \& 222 S#Cl | |
| 914 \& 223 S#P | |
| 915 \& 224 S#F | |
| 916 \& 225 S#Br | |
| 917 \& 226 S#Si | |
| 918 \& 227 S#I | |
| 919 \& 228 S#X | |
| 920 \& 229 Cl#Cl | |
| 921 \& 230 Cl#P | |
| 922 \& 231 Cl#F | |
| 923 \& 232 Cl#Br | |
| 924 \& 233 Cl#Si | |
| 925 \& 234 Cl#I | |
| 926 \& 235 Cl#X | |
| 927 \& 236 P#P | |
| 928 \& 237 P#F | |
| 929 \& 238 P#Br | |
| 930 \& 239 P#Si | |
| 931 \& 240 P#I | |
| 932 \& 241 P#X | |
| 933 \& 242 F#F | |
| 934 \& 243 F#Br | |
| 935 \& 244 F#Si | |
| 936 \& 245 F#I | |
| 937 \& 246 F#X | |
| 938 \& 247 Br#Br | |
| 939 \& 248 Br#Si | |
| 940 \& 249 Br#I | |
| 941 \& 250 Br#X | |
| 942 \& 251 Si#Si | |
| 943 \& 252 Si#I | |
| 944 \& 253 Si#X | |
| 945 \& 254 I#I | |
| 946 \& 255 I#X | |
| 947 \& 256 X#X | |
| 948 \& 257 C$C | |
| 949 \& 258 C$N | |
| 950 \& 259 C$O | |
| 951 \& 260 C$S | |
| 952 \& 261 C$Cl | |
| 953 \& 262 C$P | |
| 954 \& 263 C$F | |
| 955 \& 264 C$Br | |
| 956 \& 265 C$Si | |
| 957 \& 266 C$I | |
| 958 \& 267 C$X | |
| 959 \& 268 N$N | |
| 960 \& 269 N$O | |
| 961 \& 270 N$S | |
| 962 \& 271 N$Cl | |
| 963 \& 272 N$P | |
| 964 \& 273 N$F | |
| 965 \& 274 N$Br | |
| 966 \& 275 N$Si | |
| 967 \& 276 N$I | |
| 968 \& 277 N$X | |
| 969 \& 278 O$O | |
| 970 \& 279 O$S | |
| 971 \& 280 O$Cl | |
| 972 \& 281 O$P | |
| 973 \& 282 O$F | |
| 974 \& 283 O$Br | |
| 975 \& 284 O$Si | |
| 976 \& 285 O$I | |
| 977 \& 286 O$X | |
| 978 \& 287 S$S | |
| 979 \& 288 S$Cl | |
| 980 \& 289 S$P | |
| 981 \& 290 S$F | |
| 982 \& 291 S$Br | |
| 983 \& 292 S$Si | |
| 984 \& 293 S$I | |
| 985 \& 294 S$X | |
| 986 \& 295 Cl$Cl | |
| 987 \& 296 Cl$P | |
| 988 \& 297 Cl$F | |
| 989 \& 298 Cl$Br | |
| 990 \& 299 Cl$Si | |
| 991 \& 300 Cl$I | |
| 992 \& 301 Cl$X | |
| 993 \& 302 P$P | |
| 994 \& 303 P$F | |
| 995 \& 304 P$Br | |
| 996 \& 305 P$Si | |
| 997 \& 306 P$I | |
| 998 \& 307 P$X | |
| 999 \& 308 F$F | |
| 1000 \& 309 F$Br | |
| 1001 \& 310 F$Si | |
| 1002 \& 311 F$I | |
| 1003 \& 312 F$X | |
| 1004 \& 313 Br$Br | |
| 1005 \& 314 Br$Si | |
| 1006 \& 315 Br$I | |
| 1007 \& 316 Br$X | |
| 1008 \& 317 Si$Si | |
| 1009 \& 318 Si$I | |
| 1010 \& 319 Si$X | |
| 1011 \& 320 I$I | |
| 1012 \& 321 I$X | |
| 1013 \& 322 X$X | |
| 1014 .Ve | |
| 1015 .IP "\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR" 4 | |
| 1016 .IX Item "--OutDelim comma | tab | semicolon" | |
| 1017 Delimiter for output \s-1CSV/TSV\s0 text file(s). Possible values: \fIcomma, tab, or semicolon\fR | |
| 1018 Default value: \fIcomma\fR. | |
| 1019 .IP "\fB\-\-output\fR \fI\s-1SD\s0 | \s-1FP\s0 | text | all\fR" 4 | |
| 1020 .IX Item "--output SD | FP | text | all" | |
| 1021 Type of output files to generate. Possible values: \fI\s-1SD\s0, \s-1FP\s0, text, or all\fR. Default value: \fItext\fR. | |
| 1022 .IP "\fB\-o, \-\-overwrite\fR" 4 | |
| 1023 .IX Item "-o, --overwrite" | |
| 1024 Overwrite existing files. | |
| 1025 .IP "\fB\-q, \-\-quote\fR \fIYes | No\fR" 4 | |
| 1026 .IX Item "-q, --quote Yes | No" | |
| 1027 Put quote around column values in output \s-1CSV/TSV\s0 text file(s). Possible values: | |
| 1028 \&\fIYes or No\fR. Default value: \fIYes\fR. | |
| 1029 .IP "\fB\-r, \-\-root\fR \fIRootName\fR" 4 | |
| 1030 .IX Item "-r, --root RootName" | |
| 1031 New file name is generated using the root: <Root>.<Ext>. Default for new file | |
| 1032 names: <SDFileName><MACCSKeysFP>.<Ext>. The file type determines <Ext> value. | |
| 1033 The sdf, fpf, csv, and tsv <Ext> values are used for \s-1SD\s0, \s-1FP\s0, comma/semicolon, and tab | |
| 1034 delimited text files, respectively.This option is ignored for multiple input files. | |
| 1035 .IP "\fB\-s, \-\-size\fR \fInumber\fR" 4 | |
| 1036 .IX Item "-s, --size number" | |
| 1037 Size of \s-1MACCS\s0 keys [ Ref 45\-47 ] set to use during fingerprints generation. Possible values: \fI166 or 322\fR. | |
| 1038 Default value: \fI166\fR. | |
| 1039 .IP "\fB\-v, \-\-VectorStringFormat\fR \fIValuesString | IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString\fR" 4 | |
| 1040 .IX Item "-v, --VectorStringFormat ValuesString | IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString" | |
| 1041 Format of fingerprints vector string data in output \s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) specified by | |
| 1042 \&\fB\-\-output\fR used during \fIMACCSKeyCount\fR value of \fB\-m, \-\-mode\fR option. Possible | |
| 1043 values: \fIValuesString, IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | | |
| 1044 ValuesAndIDsPairsString\fR. Defaultvalue: \fIValuesString\fR. | |
| 1045 .Sp | |
| 1046 Examples: | |
| 1047 .Sp | |
| 1048 .Vb 6 | |
| 1049 \& FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri | |
| 1050 \& ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 1051 \& 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 | |
| 1052 \& 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0 | |
| 1053 \& 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1 | |
| 1054 \& 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1 | |
| 1055 \& | |
| 1056 \& FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri | |
| 1057 \& ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0 | |
| 1058 \& 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0 | |
| 1059 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 1060 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0 | |
| 1061 \& 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... | |
| 1062 .Ve | |
| 1063 .IP "\fB\-w, \-\-WorkingDir\fR \fIDirName\fR" 4 | |
| 1064 .IX Item "-w, --WorkingDir DirName" | |
| 1065 Location of working directory. Default: current directory. | |
| 1066 .SH "EXAMPLES" | |
| 1067 .IX Header "EXAMPLES" | |
| 1068 To generate \s-1MACCS\s0 keys fingerprints of size 166 in binary bit-vector string format | |
| 1069 and create a SampleMACCS166FPBin.csv file containing sequential compound IDs along with | |
| 1070 fingerprints bit-vector strings data, type: | |
| 1071 .PP | |
| 1072 .Vb 1 | |
| 1073 \& % MACCSKeysFingerprints.pl \-r SampleMACCS166FPBin \-o Sample.sdf | |
| 1074 .Ve | |
| 1075 .PP | |
| 1076 To generate \s-1MACCS\s0 keys fingerprints of size 166 in binary bit-vector string format | |
| 1077 and create SampleMACCS166FPBin.sdf, SampleMACCS166FPBin.csv and SampleMACCS166FPBin.csv | |
| 1078 files containing sequential compound IDs in \s-1CSV\s0 file along with fingerprints bit-vector strings data, type: | |
| 1079 .PP | |
| 1080 .Vb 2 | |
| 1081 \& % MACCSKeysFingerprints.pl \-\-output all \-r SampleMACCS166FPBin | |
| 1082 \& \-o Sample.sdf | |
| 1083 .Ve | |
| 1084 .PP | |
| 1085 To generate \s-1MACCS\s0 keys fingerprints of size 322 in binary bit-vector string format | |
| 1086 and create a SampleMACCS322FPBin.csv file containing sequential compound IDs along with | |
| 1087 fingerprints bit-vector strings data, type: | |
| 1088 .PP | |
| 1089 .Vb 1 | |
| 1090 \& % MACCSKeysFingerprints.pl \-size 322 \-r SampleMACCS322FPBin \-o Sample.sdf | |
| 1091 .Ve | |
| 1092 .PP | |
| 1093 To generate \s-1MACCS\s0 keys fingerprints of size 166 corresponding to count of keys in | |
| 1094 ValuesString format and create a SampleMACCS166FPCount.csv file containing sequential | |
| 1095 compound IDs along with fingerprints vector strings data, type: | |
| 1096 .PP | |
| 1097 .Vb 2 | |
| 1098 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-r SampleMACCS166FPCount | |
| 1099 \& \-o Sample.sdf | |
| 1100 .Ve | |
| 1101 .PP | |
| 1102 To generate \s-1MACCS\s0 keys fingerprints of size 322 corresponding to count of keys in | |
| 1103 ValuesString format and create a SampleMACCS322FPCount.csv file containing sequential | |
| 1104 compound IDs along with fingerprints vector strings data, type: | |
| 1105 .PP | |
| 1106 .Vb 2 | |
| 1107 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-size 322 | |
| 1108 \& \-r SampleMACCS322FPCount \-o Sample.sdf | |
| 1109 .Ve | |
| 1110 .PP | |
| 1111 To generate \s-1MACCS\s0 keys fingerprints of size 166 in hexadecimal bit-vector string format with | |
| 1112 ascending bits order and create a SampleMACCS166FPHex.csv file containing compound IDs | |
| 1113 from MolName along with fingerprints bit-vector strings data, type: | |
| 1114 .PP | |
| 1115 .Vb 3 | |
| 1116 \& % MACCSKeysFingerprints.pl \-m MACCSKeyBits \-\-size 166 \-\-BitStringFormat | |
| 1117 \& HexadecimalString \-\-BitsOrder Ascending \-\-DataFieldsMode CompoundID | |
| 1118 \& \-\-CompoundIDMode MolName \-r SampleMACCS166FPBin \-o Sample.sdf | |
| 1119 .Ve | |
| 1120 .PP | |
| 1121 To generate \s-1MACCS\s0 keys fingerprints of size 166 corresponding to count of keys in | |
| 1122 IDsAndValuesString format and create a SampleMACCS166FPCount.csv file containing | |
| 1123 compound IDs from MolName line along with fingerprints vector strings data, type: | |
| 1124 .PP | |
| 1125 .Vb 3 | |
| 1126 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-\-size 166 | |
| 1127 \& \-\-VectorStringFormat IDsAndValuesString \-\-DataFieldsMode CompoundID | |
| 1128 \& \-\-CompoundIDMode MolName \-r SampleMACCS166FPCount \-o Sample.sdf | |
| 1129 .Ve | |
| 1130 .PP | |
| 1131 To generate \s-1MACCS\s0 keys fingerprints of size 166 corresponding to count of keys in | |
| 1132 IDsAndValuesString format and create a SampleMACCS166FPCount.csv file containing | |
| 1133 compound IDs using specified data field along with fingerprints vector strings data, type: | |
| 1134 .PP | |
| 1135 .Vb 4 | |
| 1136 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-\-size 166 | |
| 1137 \& \-\-VectorStringFormat IDsAndValuesString \-\-DataFieldsMode CompoundID | |
| 1138 \& \-\-CompoundIDMode DataField \-\-CompoundID Mol_ID \-r | |
| 1139 \& SampleMACCS166FPCount \-o Sample.sdf | |
| 1140 .Ve | |
| 1141 .PP | |
| 1142 To generate \s-1MACCS\s0 keys fingerprints of size 322 corresponding to count of keys in | |
| 1143 ValuesString format and create a SampleMACCS322FPCount.tsv file containing compound | |
| 1144 IDs derived from combination of molecule name line and an explicit compound prefix | |
| 1145 along with fingerprints vector strings data in a column labels MACCSKeyCountFP, type: | |
| 1146 .PP | |
| 1147 .Vb 4 | |
| 1148 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-size 322 \-\-DataFieldsMode | |
| 1149 \& CompoundID \-\-CompoundIDMode MolnameOrLabelPrefix \-\-CompoundID Cmpd | |
| 1150 \& \-\-CompoundIDLabel MolID \-\-FingerprintsLabel MACCSKeyCountFP \-\-OutDelim | |
| 1151 \& Tab \-r SampleMACCS322FPCount \-o Sample.sdf | |
| 1152 .Ve | |
| 1153 .PP | |
| 1154 To generate \s-1MACCS\s0 keys fingerprints of size 166 corresponding to count of keys in | |
| 1155 ValuesString format and create a SampleMACCS166FPCount.csv file containing | |
| 1156 specific data fields columns along with fingerprints vector strings data, type: | |
| 1157 .PP | |
| 1158 .Vb 3 | |
| 1159 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-\-size 166 | |
| 1160 \& \-\-VectorStringFormat ValuesString \-\-DataFieldsMode Specify \-\-DataFields | |
| 1161 \& Mol_ID \-r SampleMACCS166FPCount \-o Sample.sdf | |
| 1162 .Ve | |
| 1163 .PP | |
| 1164 To generate \s-1MACCS\s0 keys fingerprints of size 322 corresponding to count of keys in | |
| 1165 ValuesString format and create a SampleMACCS322FPCount.csv file containing | |
| 1166 common data fields columns along with fingerprints vector strings data, type: | |
| 1167 .PP | |
| 1168 .Vb 3 | |
| 1169 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-\-size 322 | |
| 1170 \& \-\-VectorStringFormat ValuesString \-\-DataFieldsMode Common \-r | |
| 1171 \& SampleMACCS322FPCount \-o Sample.sdf | |
| 1172 .Ve | |
| 1173 .PP | |
| 1174 To generate \s-1MACCS\s0 keys fingerprints of size 166 corresponding to count of keys in | |
| 1175 ValuesString format and create SampleMACCS166FPCount.sdf, SampleMACCS166FPCount.fpf and | |
| 1176 SampleMACCS166FPCount.csv files containing all data fields columns in \s-1CSV\s0 file | |
| 1177 along with fingerprints vector strings data, type: | |
| 1178 .PP | |
| 1179 .Vb 3 | |
| 1180 \& % MACCSKeysFingerprints.pl \-m MACCSKeyCount \-\-size 166 \-\-output all | |
| 1181 \& \-\-VectorStringFormat ValuesString \-\-DataFieldsMode All \-r | |
| 1182 \& SampleMACCS166FPCount \-o Sample.sdf | |
| 1183 .Ve | |
| 1184 .SH "AUTHOR" | |
| 1185 .IX Header "AUTHOR" | |
| 1186 Manish Sud <msud@san.rr.com> | |
| 1187 .SH "SEE ALSO" | |
| 1188 .IX Header "SEE ALSO" | |
| 1189 InfoFingerprintsFiles.pl, SimilarityMatricesFingerprints.pl, AtomNeighborhoodsFingerprints.pl, | |
| 1190 ExtendedConnectivityFingerprints.pl, PathLengthFingerprints.pl, | |
| 1191 TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl, | |
| 1192 TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl | |
| 1193 .SH "COPYRIGHT" | |
| 1194 .IX Header "COPYRIGHT" | |
| 1195 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 1196 .PP | |
| 1197 This file is part of MayaChemTools. | |
| 1198 .PP | |
| 1199 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 1200 the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free | |
| 1201 Software Foundation; either version 3 of the License, or (at your option) | |
| 1202 any later version. |
