Mercurial > repos > deepakjadmin > mayatool3_test2
comparison docs/scripts/man1/ExtendedConnectivityFingerprints.1 @ 0:4816e4a8ae95 draft default tip
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 09:23:18 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4816e4a8ae95 |
|---|---|
| 1 .\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22) | |
| 2 .\" | |
| 3 .\" Standard preamble: | |
| 4 .\" ======================================================================== | |
| 5 .de Sp \" Vertical space (when we can't use .PP) | |
| 6 .if t .sp .5v | |
| 7 .if n .sp | |
| 8 .. | |
| 9 .de Vb \" Begin verbatim text | |
| 10 .ft CW | |
| 11 .nf | |
| 12 .ne \\$1 | |
| 13 .. | |
| 14 .de Ve \" End verbatim text | |
| 15 .ft R | |
| 16 .fi | |
| 17 .. | |
| 18 .\" Set up some character translations and predefined strings. \*(-- will | |
| 19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left | |
| 20 .\" double quote, and \*(R" will give a right double quote. \*(C+ will | |
| 21 .\" give a nicer C++. Capital omega is used to do unbreakable dashes and | |
| 22 .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, | |
| 23 .\" nothing in troff, for use with C<>. | |
| 24 .tr \(*W- | |
| 25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' | |
| 26 .ie n \{\ | |
| 27 . ds -- \(*W- | |
| 28 . ds PI pi | |
| 29 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch | |
| 30 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch | |
| 31 . ds L" "" | |
| 32 . ds R" "" | |
| 33 . ds C` "" | |
| 34 . ds C' "" | |
| 35 'br\} | |
| 36 .el\{\ | |
| 37 . ds -- \|\(em\| | |
| 38 . ds PI \(*p | |
| 39 . ds L" `` | |
| 40 . ds R" '' | |
| 41 'br\} | |
| 42 .\" | |
| 43 .\" Escape single quotes in literal strings from groff's Unicode transform. | |
| 44 .ie \n(.g .ds Aq \(aq | |
| 45 .el .ds Aq ' | |
| 46 .\" | |
| 47 .\" If the F register is turned on, we'll generate index entries on stderr for | |
| 48 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index | |
| 49 .\" entries marked with X<> in POD. Of course, you'll have to process the | |
| 50 .\" output yourself in some meaningful fashion. | |
| 51 .ie \nF \{\ | |
| 52 . de IX | |
| 53 . tm Index:\\$1\t\\n%\t"\\$2" | |
| 54 .. | |
| 55 . nr % 0 | |
| 56 . rr F | |
| 57 .\} | |
| 58 .el \{\ | |
| 59 . de IX | |
| 60 .. | |
| 61 .\} | |
| 62 .\" | |
| 63 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). | |
| 64 .\" Fear. Run. Save yourself. No user-serviceable parts. | |
| 65 . \" fudge factors for nroff and troff | |
| 66 .if n \{\ | |
| 67 . ds #H 0 | |
| 68 . ds #V .8m | |
| 69 . ds #F .3m | |
| 70 . ds #[ \f1 | |
| 71 . ds #] \fP | |
| 72 .\} | |
| 73 .if t \{\ | |
| 74 . ds #H ((1u-(\\\\n(.fu%2u))*.13m) | |
| 75 . ds #V .6m | |
| 76 . ds #F 0 | |
| 77 . ds #[ \& | |
| 78 . ds #] \& | |
| 79 .\} | |
| 80 . \" simple accents for nroff and troff | |
| 81 .if n \{\ | |
| 82 . ds ' \& | |
| 83 . ds ` \& | |
| 84 . ds ^ \& | |
| 85 . ds , \& | |
| 86 . ds ~ ~ | |
| 87 . ds / | |
| 88 .\} | |
| 89 .if t \{\ | |
| 90 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" | |
| 91 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' | |
| 92 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' | |
| 93 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' | |
| 94 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' | |
| 95 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' | |
| 96 .\} | |
| 97 . \" troff and (daisy-wheel) nroff accents | |
| 98 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' | |
| 99 .ds 8 \h'\*(#H'\(*b\h'-\*(#H' | |
| 100 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] | |
| 101 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' | |
| 102 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' | |
| 103 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] | |
| 104 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] | |
| 105 .ds ae a\h'-(\w'a'u*4/10)'e | |
| 106 .ds Ae A\h'-(\w'A'u*4/10)'E | |
| 107 . \" corrections for vroff | |
| 108 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' | |
| 109 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' | |
| 110 . \" for low resolution devices (crt and lpr) | |
| 111 .if \n(.H>23 .if \n(.V>19 \ | |
| 112 \{\ | |
| 113 . ds : e | |
| 114 . ds 8 ss | |
| 115 . ds o a | |
| 116 . ds d- d\h'-1'\(ga | |
| 117 . ds D- D\h'-1'\(hy | |
| 118 . ds th \o'bp' | |
| 119 . ds Th \o'LP' | |
| 120 . ds ae ae | |
| 121 . ds Ae AE | |
| 122 .\} | |
| 123 .rm #[ #] #H #V #F C | |
| 124 .\" ======================================================================== | |
| 125 .\" | |
| 126 .IX Title "EXTENDEDCONNECTIVITYFINGERPRINTS 1" | |
| 127 .TH EXTENDEDCONNECTIVITYFINGERPRINTS 1 "2015-03-29" "perl v5.14.2" "MayaChemTools" | |
| 128 .\" For nroff, turn off justification. Always turn off hyphenation; it makes | |
| 129 .\" way too many mistakes in technical documents. | |
| 130 .if n .ad l | |
| 131 .nh | |
| 132 .SH "NAME" | |
| 133 ExtendedConnectivityFingerprints.pl \- Generate extended connectivity fingerprints for SD files | |
| 134 .SH "SYNOPSIS" | |
| 135 .IX Header "SYNOPSIS" | |
| 136 ExtendedConnectivityFingerprints.pl SDFile(s)... | |
| 137 .PP | |
| 138 ExtendedConnectivityFingerprints.pl [\fB\-\-AromaticityModel\fR \fIAromaticityModelType\fR] | |
| 139 [\fB\-a, \-\-AtomIdentifierType\fR \fIAtomicInvariantsAtomTypes\fR] | |
| 140 [\fB\-\-AtomicInvariantsToUse\fR \fI\*(L"AtomicInvariant,AtomicInvariant...\*(R"\fR] | |
| 141 [\fB\-\-FunctionalClassesToUse\fR \fI\*(L"FunctionalClass1,FunctionalClass2...\*(R"\fR] | |
| 142 [\fB\-\-BitsOrder\fR \fIAscending | Descending\fR] [\fB\-b, \-\-BitStringFormat\fR \fIBinaryString | HexadecimalString\fR] | |
| 143 [\fB\-\-CompoundID\fR \fIDataFieldName or LabelPrefixString\fR] [\fB\-\-CompoundIDLabel\fR \fItext\fR] | |
| 144 [\fB\-\-CompoundIDMode\fR] [\fB\-\-DataFields\fR \fI\*(L"FieldLabel1,FieldLabel2,...\*(R"\fR] | |
| 145 [\fB\-d, \-\-DataFieldsMode\fR \fIAll | Common | Specify | CompoundID\fR] [\fB\-f, \-\-Filter\fR \fIYes | No\fR] | |
| 146 [\fB\-\-FingerprintsLabel\fR \fItext\fR] [\fB\-h, \-\-help\fR] [\fB\-k, \-\-KeepLargestComponent\fR \fIYes | No\fR] | |
| 147 [\fB\-m, \-\-mode\fR \fIExtendedConnectivity | ExtendedConnecticityCount | ExtendedConnecticityBits\fR] | |
| 148 [\fB\-n, \-\-NeighborhoodRadius\fR \fInumber\fR] [\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR] [\fB\-\-output\fR \fI\s-1SD\s0 | \s-1FP\s0 | text | all\fR] | |
| 149 [\fB\-o, \-\-overwrite\fR] [\fB\-q, \-\-quote\fR \fIYes | No\fR] [\fB\-r, \-\-root\fR \fIRootName\fR] [\fB\-s, \-\-size\fR \fInumber\fR] | |
| 150 [\fB\-\-UsePerlCoreRandom\fR \fIYes | No\fR] | |
| 151 [\fB\-v, \-\-VectorStringFormat\fR \fIIDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString\fR] | |
| 152 [\fB\-w, \-\-WorkingDir\fR dirname] SDFile(s)... | |
| 153 .SH "DESCRIPTION" | |
| 154 .IX Header "DESCRIPTION" | |
| 155 Generate extended connectivity fingerprints [ Ref 48, Ref 52 ] for \fISDFile(s)\fR and create appropriate | |
| 156 \&\s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) containing fingerprints vector strings corresponding to molecular fingerprints. | |
| 157 .PP | |
| 158 Multiple SDFile names are separated by spaces. The valid file extensions are \fI.sdf\fR | |
| 159 and \fI.sd\fR. All other file names are ignored. All the \s-1SD\s0 files in a current directory | |
| 160 can be specified either by \fI*.sdf\fR or the current directory name. | |
| 161 .PP | |
| 162 The current release of MayaChemTools supports generation of extended connectivity fingerprints | |
| 163 corresponding to following \fB\-a, \-\-AtomIdentifierTypes\fR: | |
| 164 .PP | |
| 165 .Vb 3 | |
| 166 \& AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, | |
| 167 \& FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, | |
| 168 \& SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes | |
| 169 .Ve | |
| 170 .PP | |
| 171 Based on values specified for \fB\-a, \-\-AtomIdentifierType\fR, \fB\-\-AtomicInvariantsToUse\fR | |
| 172 and \fB\-\-FunctionalClassesToUse\fR, initial atom types are assigned to all non-hydrogen atoms in | |
| 173 a molecule and these atom types strings are converted into initial atom identifier integers using | |
| 174 \&\fBTextUtil::HashCode\fR function. The duplicate atom identifiers are removed. | |
| 175 .PP | |
| 176 For \fB\-n, \-\-NeighborhoodRadius\fR value of \fI0\fR, the initial set of unique atom identifiers comprises | |
| 177 the molecule fingerprints. Otherwise, atom neighborhoods are generated for each non-hydrogen | |
| 178 atom up to specified \fB\-n, \-\-NeighborhoodRadius\fR value. For each non-hydrogen central atom | |
| 179 at a specific radius, its neighbors at next radius level along with their bond orders and previously | |
| 180 calculated atom identifiers are collected which in turn are used to generate a new integer | |
| 181 atom identifier; the bond orders and atom identifier pairs list is first sorted by bond order | |
| 182 followed by atom identifiers to make these values graph invariant. | |
| 183 .PP | |
| 184 After integer atom identifiers have been generated for all non-hydrogen atoms at all specified | |
| 185 neighborhood radii, the duplicate integer atom identifiers corresponding to same hash code | |
| 186 value generated using \fBTextUtil::HashCode\fR are tracked by keeping the atom identifiers at | |
| 187 lower radius. Additionally, all structurally duplicate integer atom identifiers at each specified | |
| 188 radius are also tracked by identifying equivalent atoms and bonds corresponding to substructures | |
| 189 used for generating atom identifier and keeping integer atom identifier with lowest value. | |
| 190 .PP | |
| 191 For \fIExtendedConnnectivity\fR value of fingerprints \fB\-m, \-\-mode\fR, the duplicate identifiers are | |
| 192 removed from the list and the unique atom identifiers constitute the extended connectivity | |
| 193 fingerprints of a molecule. | |
| 194 .PP | |
| 195 For \fIExtendedConnnectivityCount\fR value of fingerprints \fB\-m, \-\-mode\fR, the occurrence of each | |
| 196 unique atom identifiers appears is counted and the unique atom identifiers along with their | |
| 197 count constitute the extended connectivity fingerprints of a molecule. | |
| 198 .PP | |
| 199 For \fIExtendedConnectivityBits\fR value of fingerprints \fB\-m, \-\-mode\fR, the unique atom identifiers | |
| 200 are used as a random number seed to generate a random integer value between 0 and \fB\-\-Size\fR which | |
| 201 in turn is used to set corresponding bits in the fingerprint bit-vector string. | |
| 202 .PP | |
| 203 Example of \fI\s-1SD\s0\fR file containing extended connectivity fingerprints string data: | |
| 204 .PP | |
| 205 .Vb 10 | |
| 206 \& ... ... | |
| 207 \& ... ... | |
| 208 \& $$$$ | |
| 209 \& ... ... | |
| 210 \& ... ... | |
| 211 \& ... ... | |
| 212 \& 41 44 0 0 0 0 0 0 0 0999 V2000 | |
| 213 \& \-3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 | |
| 214 \& ... ... | |
| 215 \& 2 3 1 0 0 0 0 | |
| 216 \& ... ... | |
| 217 \& M END | |
| 218 \& > <CmpdID> | |
| 219 \& Cmpd1 | |
| 220 \& | |
| 221 \& > <ExtendedConnectivityFingerprints> | |
| 222 \& FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radiu | |
| 223 \& s2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 66 | |
| 224 \& 6191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414087 | |
| 225 \& 99 49532520 64643108 79385615 96062769 273726379 564565671 855141035 90 | |
| 226 \& 6706094 988546669 1018231313 1032696425 1197507444 1331250018 133853... | |
| 227 \& | |
| 228 \& $$$$ | |
| 229 \& ... ... | |
| 230 \& ... ... | |
| 231 .Ve | |
| 232 .PP | |
| 233 Example of \fI\s-1FP\s0\fR file containing extended connectivity fingerprints string data: | |
| 234 .PP | |
| 235 .Vb 10 | |
| 236 \& # | |
| 237 \& # Package = MayaChemTools 7.4 | |
| 238 \& # Release Date = Oct 21, 2010 | |
| 239 \& # | |
| 240 \& # TimeStamp = Fri Mar 11 14:43:57 2011 | |
| 241 \& # | |
| 242 \& # FingerprintsStringType = FingerprintsVector | |
| 243 \& # | |
| 244 \& # Description = ExtendedConnectivity:AtomicInvariantsAtomTypes:Radius2 | |
| 245 \& # VectorStringFormat = ValuesString | |
| 246 \& # VectorValuesType = AlphaNumericalValues | |
| 247 \& # | |
| 248 \& Cmpd1 60;73555770 333564680 352413391 666191900 1001270906 137167432... | |
| 249 \& Cmpd2 41;73555770 333564680 666191900 1142173602 1363635752 14814699... | |
| 250 \& ... ... | |
| 251 \& ... .. | |
| 252 .Ve | |
| 253 .PP | |
| 254 Example of \s-1CSV\s0 \fIText\fR file containing extended connectivity fingerprints string data: | |
| 255 .PP | |
| 256 .Vb 8 | |
| 257 \& "CompoundID","ExtendedConnectivityFingerprints" | |
| 258 \& "Cmpd1","FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTy | |
| 259 \& pes:Radius2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352 | |
| 260 \& 413391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
| 261 \& 2141408799 49532520 64643108 79385615 96062769 273726379 564565671 8551 | |
| 262 \& 41035 906706094 988546669 1018231313 1032696425 1197507444 13312500..." | |
| 263 \& ... ... | |
| 264 \& ... ... | |
| 265 .Ve | |
| 266 .PP | |
| 267 The current release of MayaChemTools generates the following types of extended connectivity | |
| 268 fingerprints vector strings: | |
| 269 .PP | |
| 270 .Vb 6 | |
| 271 \& FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi | |
| 272 \& us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 | |
| 273 \& 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414 | |
| 274 \& 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103 | |
| 275 \& 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338 | |
| 276 \& 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303... | |
| 277 \& | |
| 278 \& FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes | |
| 279 \& :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524 | |
| 280 \& 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
| 281 \& 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...; | |
| 282 \& 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2 | |
| 283 \& 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 | |
| 284 \& | |
| 285 \& FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
| 286 \& es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100 | |
| 287 \& 0000000001010000000110000011000000000000100000000000000000000000100001 | |
| 288 \& 1000000110000000000000000000000000010011000000000000000000000000010000 | |
| 289 \& 0000000000000000000000000010000000000000000001000000000000000000000000 | |
| 290 \& 0000000000010000100001000000000000101000000000000000100000000000000... | |
| 291 \& | |
| 292 \& FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
| 293 \& es:Radius2;1024;HexadecimalString;Ascending;000000010050c0600800000803 | |
| 294 \& 0300000091000004000000020000100000000124008200020000000040020000000000 | |
| 295 \& 2080000000820040010020000000008040000000000080001000000000400000000000 | |
| 296 \& 4040000090000061010000000800200000000000001400000000020080000000000020 | |
| 297 \& 00008020200000408000 | |
| 298 \& | |
| 299 \& FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu | |
| 300 \& s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8 | |
| 301 \& 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567 | |
| 302 \& 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012 | |
| 303 \& 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455 | |
| 304 \& 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404... | |
| 305 \& | |
| 306 \& FingerprintsVector;ExtendedConnectivityCount:FunctionalClassAtomTypes: | |
| 307 \& Radius2;57;NumericalValues;IDsAndValuesString;24769214 508787397 85039 | |
| 308 \& 3286 862102353 981185303 1231636850 1649386610 1941540674 263599683 32 | |
| 309 \& 9205671 571109041 639579325 683993318 723853089 810600886 885767127...; | |
| 310 \& 1 1 1 10 2 22 3 1 3 3 1 1 1 3 2 2 1 2 2 2 3 1 1 1 1 1 14 1 1 1 1 1 1 2 | |
| 311 \& 1 2 1 1 2 2 1 1 2 1 1 1 2 1 1 2 1 1 1 1 1 1 1 | |
| 312 \& | |
| 313 \& FingerprintsBitVector;ExtendedConnectivityBits:FunctionalClassAtomType | |
| 314 \& s:Radius2;1024;BinaryString;Ascending;00000000000000000000100000000000 | |
| 315 \& 0000000001000100000000001000000000000000000000000000000000101000000010 | |
| 316 \& 0000001000000000010000000000000000000000000000000000000000000000000100 | |
| 317 \& 0000000000001000000000000001000000000001001000000000000000000000000000 | |
| 318 \& 0000000000000000100000000000001000000000000000000000000000000000000... | |
| 319 \& | |
| 320 \& FingerprintsVector;ExtendedConnectivity:DREIDINGAtomTypes:Radius2;56;A | |
| 321 \& lphaNumericalValues;ValuesString;280305427 357928343 721790579 1151822 | |
| 322 \& 898 1207111054 1380963747 1568213839 1603445250 4559268 55012922 18094 | |
| 323 \& 0813 335715751 534801009 684609658 829361048 972945982 999881534 10076 | |
| 324 \& 55741 1213692591 1222032501 1224517934 1235687794 1244268533 152812070 | |
| 325 \& 0 1629595024 1856308891 1978806036 2001865095 2096549435 172675415 ... | |
| 326 \& | |
| 327 \& FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp | |
| 328 \& haNumericalValues;ValuesString;25189973 528584866 662581668 671034184 | |
| 329 \& 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450 | |
| 330 \& 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430 | |
| 331 \& 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134 | |
| 332 \& 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566... | |
| 333 \& | |
| 334 \& FingerprintsVector;ExtendedConnectivity:MMFF94AtomTypes:Radius2;64;Alp | |
| 335 \& haNumericalValues;ValuesString;224051550 746527773 998750766 103704190 | |
| 336 \& 2 1239701709 1248384926 1259447756 1521678386 1631549126 1909437580 20 | |
| 337 \& 37095052 2104274756 2117729376 8770364 31445800 81450228 314289324 344 | |
| 338 \& 041929 581773587 638555787 692022098 811840536 929651561 936421792 988 | |
| 339 \& 636432 1048624296 1054288509 1369487579 1454058929 1519352190 17271... | |
| 340 \& | |
| 341 \& FingerprintsVector;ExtendedConnectivity:SLogPAtomTypes:Radius2;71;Alph | |
| 342 \& aNumericalValues;ValuesString;78989290 116507218 489454042 888737940 1 | |
| 343 \& 162561799 1241797255 1251494264 1263717127 1471206899 1538061784 17654 | |
| 344 \& 07295 1795036542 1809833874 2020454493 2055310842 2117729376 11868981 | |
| 345 \& 56731842 149505242 184525155 196984339 288181334 481409282 556716568 6 | |
| 346 \& 41915747 679881756 721736571 794256218 908276640 992898760 10987549... | |
| 347 \& | |
| 348 \& FingerprintsVector;ExtendedConnectivity:SYBYLAtomTypes:Radius2;58;Alph | |
| 349 \& aNumericalValues;ValuesString;199957044 313356892 455463968 465982819 | |
| 350 \& 1225318176 1678585943 1883366064 1963811677 2117729376 113784599 19153 | |
| 351 \& 8837 196629033 263865277 416380653 477036669 681527491 730724924 90906 | |
| 352 \& 5537 1021959189 1133014972 1174311016 1359441203 1573452838 1661585138 | |
| 353 \& 1668649038 1684198062 1812312554 1859266290 1891651106 2072549404 ... | |
| 354 \& | |
| 355 \& FingerprintsVector;ExtendedConnectivity:TPSAAtomTypes:Radius2;47;Alpha | |
| 356 \& NumericalValues;ValuesString;20818206 259344053 862102353 1331904542 1 | |
| 357 \& 700688206 265614156 363161397 681332588 810600886 885767127 950172500 | |
| 358 \& 951454814 1059668746 1247054493 1382302230 1399502637 1805025917 19189 | |
| 359 \& 39561 2114677228 2126402271 8130483 17645742 32278373 149975755 160327 | |
| 360 \& 654 256360355 279492740 291251259 317592700 333763396 972105960 101... | |
| 361 \& | |
| 362 \& FingerprintsVector;ExtendedConnectivity:UFFAtomTypes:Radius2;56;AlphaN | |
| 363 \& umericalValues;ValuesString;280305427 357928343 721790579 1151822898 1 | |
| 364 \& 207111054 1380963747 1568213839 1603445250 4559268 55012922 180940813 | |
| 365 \& 335715751 534801009 684609658 829361048 972945982 999881534 1007655741 | |
| 366 \& 1213692591 1222032501 1224517934 1235687794 1244268533 1528120700 162 | |
| 367 \& 9595024 1856308891 1978806036 2001865095 2096549435 172675415 18344... | |
| 368 .Ve | |
| 369 .SH "OPTIONS" | |
| 370 .IX Header "OPTIONS" | |
| 371 .IP "\fB\-\-AromaticityModel\fR \fIMDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel\fR" 4 | |
| 372 .IX Item "--AromaticityModel MDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel" | |
| 373 Specify aromaticity model to use during detection of aromaticity. Possible values in the current | |
| 374 release are: \fIMDLAromaticityModel, TriposAromaticityModel, MMFFAromaticityModel, | |
| 375 ChemAxonBasicAromaticityModel, ChemAxonGeneralAromaticityModel, DaylightAromaticityModel | |
| 376 or MayaChemToolsAromaticityModel\fR. Default value: \fIMayaChemToolsAromaticityModel\fR. | |
| 377 .Sp | |
| 378 The supported aromaticity model names along with model specific control parameters | |
| 379 are defined in \fBAromaticityModelsData.csv\fR, which is distributed with the current release | |
| 380 and is available under \fBlib/data\fR directory. \fBMolecule.pm\fR module retrieves data from | |
| 381 this file during class instantiation and makes it available to method \fBDetectAromaticity\fR | |
| 382 for detecting aromaticity corresponding to a specific model. | |
| 383 .IP "\fB\-a, \-\-AtomIdentifierType\fR \fIAtomicInvariantsAtomTypes | FunctionalClassAtomTypes | DREIDINGAtomTypes | EStateAtomTypes | MMFF94AtomTypes | SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes\fR" 4 | |
| 384 .IX Item "-a, --AtomIdentifierType AtomicInvariantsAtomTypes | FunctionalClassAtomTypes | DREIDINGAtomTypes | EStateAtomTypes | MMFF94AtomTypes | SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes" | |
| 385 Specify atom identifier type to use for assignment of initial atom identifier to non-hydrogen | |
| 386 atoms during calculation of extended connectivity fingerprints [ Ref 48, Ref 52]. Possible values | |
| 387 in the current release are: \fIAtomicInvariantsAtomTypes, FunctionalClassAtomTypes, | |
| 388 DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, | |
| 389 TPSAAtomTypes, UFFAtomTypes\fR. Default value: \fIAtomicInvariantsAtomTypes\fR. | |
| 390 .ie n .IP "\fB\-\-AtomicInvariantsToUse\fR \fI""AtomicInvariant,AtomicInvariant...""\fR" 4 | |
| 391 .el .IP "\fB\-\-AtomicInvariantsToUse\fR \fI``AtomicInvariant,AtomicInvariant...''\fR" 4 | |
| 392 .IX Item "--AtomicInvariantsToUse AtomicInvariant,AtomicInvariant..." | |
| 393 This value is used during \fIAtomicInvariantsAtomTypes\fR value of \fBa, \-\-AtomIdentifierType\fR | |
| 394 option. It's a list of comma separated valid atomic invariant atom types. | |
| 395 .Sp | |
| 396 Possible values for atomic invarians are: \fI\s-1AS\s0, X, \s-1BO\s0, \s-1LBO\s0, \s-1SB\s0, \s-1DB\s0, \s-1TB\s0, | |
| 397 H, Ar, \s-1RA\s0, \s-1FC\s0, \s-1MN\s0, \s-1SM\s0\fR. Default value [ Ref 24 ]: \fI\s-1AS\s0,X,BO,H,FC,MN\fR. | |
| 398 .Sp | |
| 399 The atomic invariants abbreviations correspond to: | |
| 400 .Sp | |
| 401 .Vb 1 | |
| 402 \& AS = Atom symbol corresponding to element symbol | |
| 403 \& | |
| 404 \& X<n> = Number of non\-hydrogen atom neighbors or heavy atoms | |
| 405 \& BO<n> = Sum of bond orders to non\-hydrogen atom neighbors or heavy atoms | |
| 406 \& LBO<n> = Largest bond order of non\-hydrogen atom neighbors or heavy atoms | |
| 407 \& SB<n> = Number of single bonds to non\-hydrogen atom neighbors or heavy atoms | |
| 408 \& DB<n> = Number of double bonds to non\-hydrogen atom neighbors or heavy atoms | |
| 409 \& TB<n> = Number of triple bonds to non\-hydrogen atom neighbors or heavy atoms | |
| 410 \& H<n> = Number of implicit and explicit hydrogens for atom | |
| 411 \& Ar = Aromatic annotation indicating whether atom is aromatic | |
| 412 \& RA = Ring atom annotation indicating whether atom is a ring | |
| 413 \& FC<+n/\-n> = Formal charge assigned to atom | |
| 414 \& MN<n> = Mass number indicating isotope other than most abundant isotope | |
| 415 \& SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or | |
| 416 \& 3 (triplet) | |
| 417 .Ve | |
| 418 .Sp | |
| 419 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: | |
| 420 .Sp | |
| 421 .Vb 1 | |
| 422 \& AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/\-n>.MN<n>.SM<n> | |
| 423 .Ve | |
| 424 .Sp | |
| 425 Except for \s-1AS\s0 which is a required atomic invariant in atom types, all other atomic invariants are | |
| 426 optional. Atom type specification doesn't include atomic invariants with zero or undefined values. | |
| 427 .Sp | |
| 428 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words | |
| 429 are also allowed: | |
| 430 .Sp | |
| 431 .Vb 12 | |
| 432 \& X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors | |
| 433 \& BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms | |
| 434 \& LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms | |
| 435 \& SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms | |
| 436 \& DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms | |
| 437 \& TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms | |
| 438 \& H : NumOfImplicitAndExplicitHydrogens | |
| 439 \& Ar : Aromatic | |
| 440 \& RA : RingAtom | |
| 441 \& FC : FormalCharge | |
| 442 \& MN : MassNumber | |
| 443 \& SM : SpinMultiplicity | |
| 444 .Ve | |
| 445 .Sp | |
| 446 \&\fIAtomTypes::AtomicInvariantsAtomTypes\fR module is used to assign atomic invariant | |
| 447 atom types. | |
| 448 .IP "\fB\-\-BitsOrder\fR \fIAscending | Descending\fR" 4 | |
| 449 .IX Item "--BitsOrder Ascending | Descending" | |
| 450 Bits order to use during generation of fingerprints bit-vector string for \fIExtendedConnectivityBits\fR | |
| 451 value of \fB\-m, \-\-mode\fR option. Possible values: \fIAscending, Descending\fR. Default: \fIAscending\fR. | |
| 452 .Sp | |
| 453 \&\fIAscending\fR bit order which corresponds to first bit in each byte as the lowest bit as | |
| 454 opposed to the highest bit. | |
| 455 .Sp | |
| 456 Internally, bits are stored in \fIAscending\fR order using Perl vec function. Regardless | |
| 457 of machine order, big-endian or little-endian, vec function always considers first | |
| 458 string byte as the lowest byte and first bit within each byte as the lowest bit. | |
| 459 .IP "\fB\-b, \-\-BitStringFormat\fR \fIBinaryString | HexadecimalString\fR" 4 | |
| 460 .IX Item "-b, --BitStringFormat BinaryString | HexadecimalString" | |
| 461 Format of fingerprints bit-vector string data in output \s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) specified by | |
| 462 \&\fB\-\-output\fR used during \fIExtendedConnectivityBits\fR value of \fB\-m, \-\-mode\fR option. Possible | |
| 463 values: \fIBinaryString, HexadecimalString\fR. Default value: \fIBinaryString\fR. | |
| 464 .Sp | |
| 465 \&\fIBinaryString\fR corresponds to an \s-1ASCII\s0 string containing 1s and 0s. \fIHexadecimalString\fR | |
| 466 contains bit values in \s-1ASCII\s0 hexadecimal format. | |
| 467 .Sp | |
| 468 Examples: | |
| 469 .Sp | |
| 470 .Vb 6 | |
| 471 \& FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp | |
| 472 \& es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100 | |
| 473 \& 0000000001010000000110000011000000000000100000000000000000000000100001 | |
| 474 \& 1000000110000000000000000000000000010011000000000000000000000000010000 | |
| 475 \& 0000000000000000000000000010000000000000000001000000000000000000000000 | |
| 476 \& 0000000000010000100001000000000000101000000000000000100000000000000... | |
| 477 \& | |
| 478 \& FingerprintsBitVector;ExtendedConnectivityBits:FunctionalClassAtomType | |
| 479 \& s:Radius2;1024;BinaryString;Ascending;00000000000000000000100000000000 | |
| 480 \& 0000000001000100000000001000000000000000000000000000000000101000000010 | |
| 481 \& 0000001000000000010000000000000000000000000000000000000000000000000100 | |
| 482 \& 0000000000001000000000000001000000000001001000000000000000000000000000 | |
| 483 \& 0000000000000000100000000000001000000000000000000000000000000000000... | |
| 484 .Ve | |
| 485 .ie n .IP "\fB\-\-FunctionalClassesToUse\fR \fI""FunctionalClass1,FunctionalClass2...""\fR" 4 | |
| 486 .el .IP "\fB\-\-FunctionalClassesToUse\fR \fI``FunctionalClass1,FunctionalClass2...''\fR" 4 | |
| 487 .IX Item "--FunctionalClassesToUse FunctionalClass1,FunctionalClass2..." | |
| 488 This value is used during \fIFunctionalClassAtomTypes\fR value of \fBa, \-\-AtomIdentifierType\fR | |
| 489 option. It's a list of comma separated valid functional classes. | |
| 490 .Sp | |
| 491 Possible values for atom functional classes are: \fIAr, \s-1CA\s0, H, \s-1HBA\s0, \s-1HBD\s0, Hal, \s-1NI\s0, \s-1PI\s0, \s-1RA\s0\fR. | |
| 492 Default value [ Ref 24 ]: \fI\s-1HBD\s0,HBA,PI,NI,Ar,Hal\fR. | |
| 493 .Sp | |
| 494 The functional class abbreviations correspond to: | |
| 495 .Sp | |
| 496 .Vb 9 | |
| 497 \& HBD: HydrogenBondDonor | |
| 498 \& HBA: HydrogenBondAcceptor | |
| 499 \& PI : PositivelyIonizable | |
| 500 \& NI : NegativelyIonizable | |
| 501 \& Ar : Aromatic | |
| 502 \& Hal : Halogen | |
| 503 \& H : Hydrophobic | |
| 504 \& RA : RingAtom | |
| 505 \& CA : ChainAtom | |
| 506 \& | |
| 507 \& Functional class atom type specification for an atom corresponds to: | |
| 508 \& | |
| 509 \& Ar.CA.H.HBA.HBD.Hal.NI.PI.RA | |
| 510 .Ve | |
| 511 .Sp | |
| 512 \&\fIAtomTypes::FunctionalClassAtomTypes\fR module is used to assign functional class atom | |
| 513 types. It uses following definitions [ Ref 60\-61, Ref 65\-66 ]: | |
| 514 .Sp | |
| 515 .Vb 4 | |
| 516 \& HydrogenBondDonor: NH, NH2, OH | |
| 517 \& HydrogenBondAcceptor: N[!H], O | |
| 518 \& PositivelyIonizable: +, NH2 | |
| 519 \& NegativelyIonizable: \-, C(=O)OH, S(=O)OH, P(=O)OH | |
| 520 .Ve | |
| 521 .IP "\fB\-\-CompoundID\fR \fIDataFieldName or LabelPrefixString\fR" 4 | |
| 522 .IX Item "--CompoundID DataFieldName or LabelPrefixString" | |
| 523 This value is \fB\-\-CompoundIDMode\fR specific and indicates how compound \s-1ID\s0 is generated. | |
| 524 .Sp | |
| 525 For \fIDataField\fR value of \fB\-\-CompoundIDMode\fR option, it corresponds to datafield label name | |
| 526 whose value is used as compound \s-1ID\s0; otherwise, it's a prefix string used for generating compound | |
| 527 IDs like LabelPrefixString<Number>. Default value, \fICmpd\fR, generates compound IDs which | |
| 528 look like Cmpd<Number>. | |
| 529 .Sp | |
| 530 Examples for \fIDataField\fR value of \fB\-\-CompoundIDMode\fR: | |
| 531 .Sp | |
| 532 .Vb 2 | |
| 533 \& MolID | |
| 534 \& ExtReg | |
| 535 .Ve | |
| 536 .Sp | |
| 537 Examples for \fILabelPrefix\fR or \fIMolNameOrLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR: | |
| 538 .Sp | |
| 539 .Vb 1 | |
| 540 \& Compound | |
| 541 .Ve | |
| 542 .Sp | |
| 543 The value specified above generates compound IDs which correspond to Compound<Number> | |
| 544 instead of default value of Cmpd<Number>. | |
| 545 .IP "\fB\-\-CompoundIDLabel\fR \fItext\fR" 4 | |
| 546 .IX Item "--CompoundIDLabel text" | |
| 547 Specify compound \s-1ID\s0 column label for \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) used during \fICompoundID\fR value | |
| 548 of \fB\-\-DataFieldsMode\fR option. Default: \fICompoundID\fR. | |
| 549 .IP "\fB\-\-CompoundIDMode\fR \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR" 4 | |
| 550 .IX Item "--CompoundIDMode DataField | MolName | LabelPrefix | MolNameOrLabelPrefix" | |
| 551 Specify how to generate compound IDs and write to \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) along with generated | |
| 552 fingerprints for \fI\s-1FP\s0 | text | all\fR values of \fB\-\-output\fR option: use a \fISDFile(s)\fR datafield value; | |
| 553 use molname line from \fISDFile(s)\fR; generate a sequential \s-1ID\s0 with specific prefix; use combination | |
| 554 of both MolName and LabelPrefix with usage of LabelPrefix values for empty molname lines. | |
| 555 .Sp | |
| 556 Possible values: \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR. | |
| 557 Default: \fILabelPrefix\fR. | |
| 558 .Sp | |
| 559 For \fIMolNameAndLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR, molname line in \fISDFile(s)\fR takes | |
| 560 precedence over sequential compound IDs generated using \fILabelPrefix\fR and only empty molname | |
| 561 values are replaced with sequential compound IDs. | |
| 562 .Sp | |
| 563 This is only used for \fICompoundID\fR value of \fB\-\-DataFieldsMode\fR option. | |
| 564 .ie n .IP "\fB\-\-DataFields\fR \fI""FieldLabel1,FieldLabel2,...""\fR" 4 | |
| 565 .el .IP "\fB\-\-DataFields\fR \fI``FieldLabel1,FieldLabel2,...''\fR" 4 | |
| 566 .IX Item "--DataFields FieldLabel1,FieldLabel2,..." | |
| 567 Comma delimited list of \fISDFiles(s)\fR data fields to extract and write to \s-1CSV/TSV\s0 text file(s) along | |
| 568 with generated fingerprints for \fItext | all\fR values of \fB\-\-output\fR option. | |
| 569 .Sp | |
| 570 This is only used for \fISpecify\fR value of \fB\-\-DataFieldsMode\fR option. | |
| 571 .Sp | |
| 572 Examples: | |
| 573 .Sp | |
| 574 .Vb 2 | |
| 575 \& Extreg | |
| 576 \& MolID,CompoundName | |
| 577 .Ve | |
| 578 .IP "\fB\-d, \-\-DataFieldsMode\fR \fIAll | Common | Specify | CompoundID\fR" 4 | |
| 579 .IX Item "-d, --DataFieldsMode All | Common | Specify | CompoundID" | |
| 580 Specify how data fields in \fISDFile(s)\fR are transferred to output \s-1CSV/TSV\s0 text file(s) along | |
| 581 with generated fingerprints for \fItext | all\fR values of \fB\-\-output\fR option: transfer all \s-1SD\s0 | |
| 582 data field; transfer \s-1SD\s0 data files common to all compounds; extract specified data fields; | |
| 583 generate a compound \s-1ID\s0 using molname line, a compound prefix, or a combination of both. | |
| 584 Possible values: \fIAll | Common | specify | CompoundID\fR. Default value: \fICompoundID\fR. | |
| 585 .IP "\fB\-f, \-\-Filter\fR \fIYes | No\fR" 4 | |
| 586 .IX Item "-f, --Filter Yes | No" | |
| 587 Specify whether to check and filter compound data in SDFile(s). Possible values: \fIYes or No\fR. | |
| 588 Default value: \fIYes\fR. | |
| 589 .Sp | |
| 590 By default, compound data is checked before calculating fingerprints and compounds containing | |
| 591 atom data corresponding to non-element symbols or no atom data are ignored. | |
| 592 .IP "\fB\-\-FingerprintsLabel\fR \fItext\fR" 4 | |
| 593 .IX Item "--FingerprintsLabel text" | |
| 594 \&\s-1SD\s0 data label or text file column label to use for fingerprints string in output \s-1SD\s0 or | |
| 595 \&\s-1CSV/TSV\s0 text file(s) specified by \fB\-\-output\fR. Default value: \fIExtendedConnectivityFingerprints\fR. | |
| 596 .IP "\fB\-h, \-\-help\fR" 4 | |
| 597 .IX Item "-h, --help" | |
| 598 Print this help message. | |
| 599 .IP "\fB\-k, \-\-KeepLargestComponent\fR \fIYes | No\fR" 4 | |
| 600 .IX Item "-k, --KeepLargestComponent Yes | No" | |
| 601 Generate fingerprints for only the largest component in molecule. Possible values: | |
| 602 \&\fIYes or No\fR. Default value: \fIYes\fR. | |
| 603 .Sp | |
| 604 For molecules containing multiple connected components, fingerprints can be generated | |
| 605 in two different ways: use all connected components or just the largest connected | |
| 606 component. By default, all atoms except for the largest connected component are | |
| 607 deleted before generation of fingerprints. | |
| 608 .IP "\fB\-m, \-\-mode\fR \fIExtendedConnectivity | ExtendedConnectivityCount | ExtendedConnectivityBits\fR" 4 | |
| 609 .IX Item "-m, --mode ExtendedConnectivity | ExtendedConnectivityCount | ExtendedConnectivityBits" | |
| 610 Specify type of extended connectivity fingerprints to generate for molecules in \fISDFile(s)\fR. | |
| 611 Possible values: \fIExtendedConnectivity, ExtendedConnecticityCount or | |
| 612 ExtendedConnectivityBits\fR. Default value: \fIExtendedConnectivity\fR. | |
| 613 .Sp | |
| 614 For \fIExtendedConnnectivity\fR value of fingerprints \fB\-m, \-\-mode\fR, a fingerprint vector | |
| 615 containing unique atom identifiers constitute the extended connectivity fingerprints | |
| 616 of a molecule. | |
| 617 .Sp | |
| 618 For \fIExtendedConnnectivityCount\fR value of fingerprints \fB\-m, \-\-mode\fR, a fingerprint vector | |
| 619 containing unique atom identifiers along with their count constitute the extended connectivity | |
| 620 fingerprints of a molecule. | |
| 621 .Sp | |
| 622 For \fIExtendedConnnectivityBits\fR value of fingerprints \fB\-m, \-\-mode\fR, a fingerprint bit vector | |
| 623 indicating presence/absence of structurally unique atom identifiers constitute the extended | |
| 624 connectivity fingerprints of a molecule. | |
| 625 .IP "\fB\-n, \-\-NeighborhoodRadius\fR \fInumber\fR" 4 | |
| 626 .IX Item "-n, --NeighborhoodRadius number" | |
| 627 Atomic neighborhood radius for generating extended connectivity neighborhoods. Default | |
| 628 value: \fI2\fR. Valid values: >= 0. Neighborhood radius of zero correspond to just the list | |
| 629 of non-hydrogen atoms. | |
| 630 .Sp | |
| 631 Default value of \fI2\fR for atomic neighborhood radius generates extended connectivity | |
| 632 fingerprints corresponding to path length or diameter value of \fI4\fR [ Ref 52b ]. | |
| 633 .IP "\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR" 4 | |
| 634 .IX Item "--OutDelim comma | tab | semicolon" | |
| 635 Delimiter for output \s-1CSV/TSV\s0 text file(s). Possible values: \fIcomma, tab, or semicolon\fR | |
| 636 Default value: \fIcomma\fR. | |
| 637 .IP "\fB\-\-output\fR \fI\s-1SD\s0 | \s-1FP\s0 | text | all\fR" 4 | |
| 638 .IX Item "--output SD | FP | text | all" | |
| 639 Type of output files to generate. Possible values: \fI\s-1SD\s0, \s-1FP\s0, text, or all\fR. Default value: \fItext\fR. | |
| 640 .IP "\fB\-o, \-\-overwrite\fR" 4 | |
| 641 .IX Item "-o, --overwrite" | |
| 642 Overwrite existing files. | |
| 643 .IP "\fB\-q, \-\-quote\fR \fIYes | No\fR" 4 | |
| 644 .IX Item "-q, --quote Yes | No" | |
| 645 Put quote around column values in output \s-1CSV/TSV\s0 text file(s). Possible values: | |
| 646 \&\fIYes or No\fR. Default value: \fIYes\fR. | |
| 647 .IP "\fB\-r, \-\-root\fR \fIRootName\fR" 4 | |
| 648 .IX Item "-r, --root RootName" | |
| 649 New file name is generated using the root: <Root>.<Ext>. Default for new file names: | |
| 650 <SDFileName><ExtendedConnectivityFP>.<Ext>. The file type determines <Ext> | |
| 651 value. The sdf, fpf, csv, and tsv <Ext> values are used for \s-1SD\s0, \s-1FP\s0, comma/semicolon, and tab | |
| 652 delimited text files, respectively.This option is ignored for multiple input files. | |
| 653 .IP "\fB\-s, \-\-size\fR \fInumber\fR" 4 | |
| 654 .IX Item "-s, --size number" | |
| 655 Size of bit-vector to use during generation of fingerprints bit-vector string for | |
| 656 \&\fIExtendedConnectivityBits\fR value of \fB\-m, \-\-mode\fR. Default value: \fI1024\fR. | |
| 657 Valid values correspond to any positive integer which satisfies the following criteria: | |
| 658 power of 2, >= 32 and <= 2 ** 32. | |
| 659 .Sp | |
| 660 Examples: | |
| 661 .Sp | |
| 662 .Vb 3 | |
| 663 \& 512 | |
| 664 \& 1024 | |
| 665 \& 2048 | |
| 666 .Ve | |
| 667 .IP "\fB\-\-UsePerlCoreRandom\fR \fIYes | No\fR" 4 | |
| 668 .IX Item "--UsePerlCoreRandom Yes | No" | |
| 669 Specify whether to use Perl CORE::rand or MayaChemTools MathUtil::random function | |
| 670 during random number generation for setting bits in fingerprints bit-vector strings. Possible | |
| 671 values: \fIYes or No\fR. Default value: \fIYes\fR. | |
| 672 .Sp | |
| 673 \&\fINo\fR value option for \fB\-\-UsePerlCoreRandom\fR allows the generation of fingerprints | |
| 674 bit-vector strings which are same across different platforms. | |
| 675 .Sp | |
| 676 The random number generator implemented in MayaChemTools is a variant of | |
| 677 linear congruential generator (\s-1LCG\s0) as described by Miller et al. [ Ref 120 ]. | |
| 678 It is also referred to as Lehmer random number generator or Park-Miller | |
| 679 random number generator. | |
| 680 .Sp | |
| 681 Unlike Perl's core random number generator function rand, the random number | |
| 682 generator implemented in MayaChemTools, MathUtil::random, generates consistent | |
| 683 random values across different platforms for a specific random seed and leads | |
| 684 to generation of portable fingerprints bit-vector strings. | |
| 685 .IP "\fB\-v, \-\-VectorStringFormat\fR \fIValuesString | IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString\fR" 4 | |
| 686 .IX Item "-v, --VectorStringFormat ValuesString | IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString" | |
| 687 Format of fingerprints vector string data in output \s-1SD\s0, \s-1FP\s0 or \s-1CSV/TSV\s0 text file(s) specified by | |
| 688 \&\fB\-\-output\fR used during <ExtendedConnectivityCount> value of \fB\-m, \-\-mode\fR option. Possible | |
| 689 values: \fIValuesString, IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | | |
| 690 ValuesAndIDsPairsString\fR. | |
| 691 .Sp | |
| 692 Default value during <ExtendedConnectivityCount> value of \fB\-m, \-\-mode\fR option: | |
| 693 \&\fIIDsAndValuesString\fR. | |
| 694 .Sp | |
| 695 Default value during <ExtendedConnectivity> value of \fB\-m, \-\-mode\fR option: \fIValuesString\fR. | |
| 696 .Sp | |
| 697 Examples: | |
| 698 .Sp | |
| 699 .Vb 6 | |
| 700 \& FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi | |
| 701 \& us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391 | |
| 702 \& 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414 | |
| 703 \& 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103 | |
| 704 \& 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338 | |
| 705 \& 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303... | |
| 706 \& | |
| 707 \& FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes | |
| 708 \& :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524 | |
| 709 \& 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 | |
| 710 \& 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...; | |
| 711 \& 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2 | |
| 712 \& 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1 | |
| 713 .Ve | |
| 714 .IP "\fB\-w, \-\-WorkingDir\fR \fIDirName\fR" 4 | |
| 715 .IX Item "-w, --WorkingDir DirName" | |
| 716 Location of working directory. Default: current directory. | |
| 717 .SH "EXAMPLES" | |
| 718 .IX Header "EXAMPLES" | |
| 719 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 720 2 using atomic invariants atom types in vector string format and create a SampleECAIFP.csv | |
| 721 file containing sequential compound IDs along with fingerprints vector strings data, type: | |
| 722 .PP | |
| 723 .Vb 1 | |
| 724 \& % ExtendedConnectivityFingerprints.pl \-r SampleECAIFP \-o Sample.sdf | |
| 725 .Ve | |
| 726 .PP | |
| 727 To generate extended connectivity count fingerprints corresponding to neighborhood radius up to | |
| 728 2 using atomic invariants atom types in vector string format and create a SampleECAIFP.csv | |
| 729 file containing sequential compound IDs along with fingerprints vector strings data, type: | |
| 730 .PP | |
| 731 .Vb 2 | |
| 732 \& % ExtendedConnectivityFingerprints.pl \-m ExtendedConnectivityCount | |
| 733 \& \-r SampleECAIFP \-o Sample.sdf | |
| 734 .Ve | |
| 735 .PP | |
| 736 To generate extended connectivity bits fingerprints as hexadecimal bit-string corresponding to | |
| 737 neighborhood radius up to 2 using atomic invariants atom types in vector string format and | |
| 738 create a SampleECAIFP.csv file containing sequential compound IDs along with fingerprints | |
| 739 vector strings data, type: | |
| 740 .PP | |
| 741 .Vb 2 | |
| 742 \& % ExtendedConnectivityFingerprints.pl \-m ExtendedConnectivityBits | |
| 743 \& \-r SampleECAIFP \-o Sample.sdf | |
| 744 .Ve | |
| 745 .PP | |
| 746 To generate extended connectivity bits fingerprints as binary bit-string corresponding to | |
| 747 neighborhood radius up to 2 using atomic invariants atom types in vector string format and | |
| 748 create a SampleECAIFP.csv file containing sequential compound IDs along with fingerprints | |
| 749 vector strings data, type: | |
| 750 .PP | |
| 751 .Vb 2 | |
| 752 \& % ExtendedConnectivityFingerprints.pl \-m ExtendedConnectivityBits | |
| 753 \& \-\-BitStringFormat BinaryString \-r SampleECAIFP \-o Sample.sdf | |
| 754 .Ve | |
| 755 .PP | |
| 756 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 757 2 using atomic invariants atom types in vector string format and create SampleECAIFP.sdf, SampleECAIFP.fpf | |
| 758 and SampleECAIFP.csv files containing sequential compound IDs in \s-1CSV\s0 file along with fingerprints | |
| 759 vector strings data, type: | |
| 760 .PP | |
| 761 .Vb 2 | |
| 762 \& % ExtendedConnectivityFingerprints.pl \-\-output all \-r SampleECAIFP | |
| 763 \& \-o Sample.sdf | |
| 764 .Ve | |
| 765 .PP | |
| 766 To generate extended connectivity count fingerprints corresponding to neighborhood radius up to | |
| 767 2 using atomic invariants atom types in vector string format and create SampleECAIFP.sdf, SampleECAIFP.fpf | |
| 768 and SampleECAIFP.csv files containing sequential compound IDs in \s-1CSV\s0 file along with fingerprints | |
| 769 vector strings data, type: | |
| 770 .PP | |
| 771 .Vb 2 | |
| 772 \& % ExtendedConnectivityFingerprints.pl \-m ExtendedConnectivityCount | |
| 773 \& \-\-output all \-r SampleECAIFP \-o Sample.sdf | |
| 774 .Ve | |
| 775 .PP | |
| 776 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 777 2 using functional class atom types in vector string format and create a SampleECFCFP.csv file | |
| 778 containing sequential compound IDs along with fingerprints vector strings data, type: | |
| 779 .PP | |
| 780 .Vb 2 | |
| 781 \& % ExtendedConnectivityFingerprints.pl \-a FunctionalClassAtomTypes | |
| 782 \& \-r SampleECFCFP \-o Sample.sdf | |
| 783 .Ve | |
| 784 .PP | |
| 785 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 786 2 using \s-1DREIDING\s0 atom types in vector string format and create a SampleECFP.csv file | |
| 787 containing sequential compound IDs along with fingerprints vector strings data, type: | |
| 788 .PP | |
| 789 .Vb 2 | |
| 790 \& % ExtendedConnectivityFingerprints.pl \-a DREIDINGAtomTypes | |
| 791 \& \-r SampleECFP \-o Sample.sdf | |
| 792 .Ve | |
| 793 .PP | |
| 794 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 795 2 using E\-state atom types in vector string format and create a SampleECFP.csv file | |
| 796 containing sequential compound IDs along with fingerprints vector strings data, type: | |
| 797 .PP | |
| 798 .Vb 2 | |
| 799 \& % ExtendedConnectivityFingerprints.pl \-a EStateAtomTypes | |
| 800 \& \-r SampleECFP \-o Sample.sdf | |
| 801 .Ve | |
| 802 .PP | |
| 803 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 804 2 using \s-1MMFF94\s0 atom types in vector string format and create a SampleECFP.csv file | |
| 805 containing sequential compound IDs along with fingerprints vector strings data, type: | |
| 806 .PP | |
| 807 .Vb 2 | |
| 808 \& % ExtendedConnectivityFingerprints.pl \-a MMFF94AtomTypes | |
| 809 \& \-r SampleECFP \-o Sample.sdf | |
| 810 .Ve | |
| 811 .PP | |
| 812 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 813 2 using SLogP atom types in vector string format and create a SampleECFP.csv file | |
| 814 containing sequential compound IDs along with fingerprints vector strings data, type: | |
| 815 .PP | |
| 816 .Vb 2 | |
| 817 \& % ExtendedConnectivityFingerprints.pl \-a SLogPAtomTypes | |
| 818 \& \-r SampleECFP \-o Sample.sdf | |
| 819 .Ve | |
| 820 .PP | |
| 821 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 822 2 using \s-1SYBYL\s0 atom types in vector string format and create a SampleECFP.csv file | |
| 823 containing sequential compound IDs along with fingerprints vector strings data, type: | |
| 824 .PP | |
| 825 .Vb 2 | |
| 826 \& % ExtendedConnectivityFingerprints.pl \-a SYBYLAtomTypes | |
| 827 \& \-r SampleECFP \-o Sample.sdf | |
| 828 .Ve | |
| 829 .PP | |
| 830 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 831 2 using \s-1TPSA\s0 atom types in vector string format and create a SampleECFP.csv file | |
| 832 containing sequential compound IDs along with fingerprints vector strings data, type: | |
| 833 .PP | |
| 834 .Vb 2 | |
| 835 \& % ExtendedConnectivityFingerprints.pl \-a TPSAAtomTypes | |
| 836 \& \-r SampleECFP \-o Sample.sdf | |
| 837 .Ve | |
| 838 .PP | |
| 839 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 840 2 using \s-1UFF\s0 atom types in vector string format and create a SampleECFP.csv file | |
| 841 containing sequential compound IDs along with fingerprints vector strings data, type: | |
| 842 .PP | |
| 843 .Vb 2 | |
| 844 \& % ExtendedConnectivityFingerprints.pl \-a UFFAtomTypes | |
| 845 \& \-r SampleECFP \-o Sample.sdf | |
| 846 .Ve | |
| 847 .PP | |
| 848 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 849 3 using atomic invariants atom types in vector string format and create a SampleECAIFP.csv | |
| 850 file containing sequential compound IDs along with fingerprints vector strings data, type: | |
| 851 .PP | |
| 852 .Vb 2 | |
| 853 \& % ExtendedConnectivityFingerprints.pl \-a AtomicInvariantsAtomTypes \-n 3 | |
| 854 \& \-r SampleECAIFP \-o Sample.sdf | |
| 855 .Ve | |
| 856 .PP | |
| 857 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 858 3 using functional class atom types in vector string format and create a SampleECFCFP.csv file | |
| 859 containing sequential compound IDs along with fingerprints vector strings data, type: | |
| 860 .PP | |
| 861 .Vb 2 | |
| 862 \& % ExtendedConnectivityFingerprints.pl \-a FunctionalClassAtomTypes \-n 3 | |
| 863 \& \-r SampleECFCFP \-o Sample.sdf | |
| 864 .Ve | |
| 865 .PP | |
| 866 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 867 2 using only \s-1AS\s0,X atomic invariants atom types in vector string format and create a | |
| 868 SampleECAIFP.csv file containing sequential compound IDs along with fingerprints vector | |
| 869 strings data, type: | |
| 870 .PP | |
| 871 .Vb 2 | |
| 872 \& % ExtendedConnectivityFingerprints.pl \-a AtomicInvariantsAtomTypes | |
| 873 \& \-\-AtomicInvariantsToUse "AS,X" \-r SampleECAIFP \-o Sample.sdf | |
| 874 .Ve | |
| 875 .PP | |
| 876 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 877 2 using only \s-1HBD\s0,HBA functional class atom types in vector string format and create a | |
| 878 SampleECFCFP.csv file containing sequential compound IDs along with fingerprints vector | |
| 879 strings data, type: | |
| 880 .PP | |
| 881 .Vb 2 | |
| 882 \& % ExtendedConnectivityFingerprints.pl \-a FunctionalClassAtomTypes | |
| 883 \& \-\-FunctionalClassesToUse "HBD,HBA" \-r SampleECFCFP \-o Sample.sdf | |
| 884 .Ve | |
| 885 .PP | |
| 886 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 887 2 using atomic invariants atom types in vector string format and create a SampleECAIFP.csv | |
| 888 file containing compound \s-1ID\s0 from molecule name line along with fingerprints vector strings | |
| 889 data, type: | |
| 890 .PP | |
| 891 .Vb 3 | |
| 892 \& % ExtendedConnectivityFingerprints.pl \-a AtomicInvariantsAtomTypes | |
| 893 \& \-\-DataFieldsMode CompoundID \-CompoundIDMode MolName | |
| 894 \& \-r SampleECAIFP \-o Sample.sdf | |
| 895 .Ve | |
| 896 .PP | |
| 897 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 898 2 using functional class atom types in vector string format and create a SampleECFCFP.csv | |
| 899 file containing compound IDs using specified data field along with fingerprints vector strings | |
| 900 data, type: | |
| 901 .PP | |
| 902 .Vb 3 | |
| 903 \& % ExtendedConnectivityFingerprints.pl \-a FunctionalClassAtomTypes | |
| 904 \& \-\-DataFieldsMode CompoundID \-CompoundIDMode DataField \-\-CompoundID Mol_ID | |
| 905 \& \-r SampleECFCFP \-o Sample.sdf | |
| 906 .Ve | |
| 907 .PP | |
| 908 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 909 2 using atomic invariants atom types in vector string format and create a SampleECAIFP.tsv | |
| 910 file containing compound \s-1ID\s0 using combination of molecule name line and an explicit compound | |
| 911 prefix along with fingerprints vector strings data, type: | |
| 912 .PP | |
| 913 .Vb 3 | |
| 914 \& % ExtendedConnectivityFingerprints.pl \-a AtomicInvariantsAtomTypes | |
| 915 \& \-\-DataFieldsMode CompoundID \-CompoundIDMode MolnameOrLabelPrefix | |
| 916 \& \-\-CompoundID Cmpd \-\-CompoundIDLabel MolID \-r SampleECAIFP \-o Sample.sdf | |
| 917 .Ve | |
| 918 .PP | |
| 919 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 920 2 using functional class atom types in vector string format and create a SampleECFCFP.csv | |
| 921 file containing specific data fields columns along with fingerprints vector strings | |
| 922 data, type: | |
| 923 .PP | |
| 924 .Vb 3 | |
| 925 \& % ExtendedConnectivityFingerprints.pl \-a FunctionalClassAtomTypes | |
| 926 \& \-\-DataFieldsMode Specify \-\-DataFields Mol_ID \-r SampleECFCFP | |
| 927 \& \-o Sample.sdf | |
| 928 .Ve | |
| 929 .PP | |
| 930 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 931 2 using atomic invariants atom types in vector string format and create a SampleECAIFP.tsv | |
| 932 file containing common data fields columns along with fingerprints vector strings data, type: | |
| 933 .PP | |
| 934 .Vb 2 | |
| 935 \& % ExtendedConnectivityFingerprints.pl \-a AtomicInvariantsAtomTypes | |
| 936 \& \-\-DataFieldsMode Common \-r SampleECAIFP \-o Sample.sdf | |
| 937 .Ve | |
| 938 .PP | |
| 939 To generate extended connectivity fingerprints corresponding to neighborhood radius up to | |
| 940 2 using functional class atom types in vector string format and create SampleECFCFP.sdf, SampleECFCFP.fpf | |
| 941 and SampleECFCFP.csv files containing all data fields columns in \s-1CSV\s0 file along with fingerprints | |
| 942 vector strings data, type: | |
| 943 .PP | |
| 944 .Vb 3 | |
| 945 \& % ExtendedConnectivityFingerprints.pl \-a FunctionalClassAtomTypes | |
| 946 \& \-\-DataFieldsMode All \-\-output all \-r SampleECFCFP | |
| 947 \& \-o Sample.sdf | |
| 948 .Ve | |
| 949 .SH "AUTHOR" | |
| 950 .IX Header "AUTHOR" | |
| 951 Manish Sud <msud@san.rr.com> | |
| 952 .SH "SEE ALSO" | |
| 953 .IX Header "SEE ALSO" | |
| 954 InfoFingerprintsFiles.pl, SimilarityMatricesFingerprints.pl, AtomNeighborhoodsFingerprints.pl, | |
| 955 MACCSKeysFingerprints.pl, PathLengthFingerprints.pl, | |
| 956 TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl, | |
| 957 TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl | |
| 958 .SH "COPYRIGHT" | |
| 959 .IX Header "COPYRIGHT" | |
| 960 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 961 .PP | |
| 962 This file is part of MayaChemTools. | |
| 963 .PP | |
| 964 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 965 the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free | |
| 966 Software Foundation; either version 3 of the License, or (at your option) | |
| 967 any later version. |
