annotate mayachemtools/bin/MACCSKeysFingerprints.pl @ 9:ab29fa5c8c1f draft default tip

Uploaded
author deepakjadmin
date Thu, 15 Dec 2016 14:18:03 -0500
parents 73ae111cf86f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1 #!/usr/bin/perl -w
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
2 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: MACCSKeysFingerprints.pl,v $
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/02/28 20:46:20 $
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.31 $
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
6 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
8 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
10 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
12 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
17 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
22 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
27 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
28
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
30 use FindBin; use lib "$FindBin::Bin/../lib";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
31 use Getopt::Long;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
32 use File::Basename;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
33 use Text::ParseWords;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
34 use Benchmark;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
35 use FileUtil;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
36 use TextUtil;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
37 use SDFileUtil;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
38 use MoleculeFileIO;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
39 use FileIO::FingerprintsSDFileIO;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
40 use FileIO::FingerprintsTextFileIO;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
41 use FileIO::FingerprintsFPFileIO;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
42 use Fingerprints::MACCSKeys;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
43
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
44 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
45
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
46 # Autoflush STDOUT
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
47 $| = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
48
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
49 # Starting message...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
50 $ScriptName = basename($0);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
51 print "\n$ScriptName: Starting...\n\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
52 $StartTime = new Benchmark;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
53
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
54 # Get the options and setup script...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
55 SetupScriptUsage();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
56 if ($Options{help} || @ARGV < 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
57 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
58 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
59
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
60 my(@SDFilesList);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
61 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
62
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
63 # Process options...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
64 print "Processing options...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
65 my(%OptionsInfo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
66 ProcessOptions();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
67
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
68 # Setup information about input files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
69 print "Checking input SD file(s)...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
70 my(%SDFilesInfo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
71 RetrieveSDFilesInfo();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
72
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
73 # Process input files..
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
74 my($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
75 if (@SDFilesList > 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
76 print "\nProcessing SD files...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
77 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
78 for $FileIndex (0 .. $#SDFilesList) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
79 if ($SDFilesInfo{FileOkay}[$FileIndex]) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
80 print "\nProcessing file $SDFilesList[$FileIndex]...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
81 GenerateMACCSKeysFingerprints($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
82 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
83 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
84 print "\n$ScriptName:Done...\n\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
85
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
86 $EndTime = new Benchmark;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
87 $TotalTime = timediff ($EndTime, $StartTime);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
88 print "Total time: ", timestr($TotalTime), "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
89
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
90 ###############################################################################
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
91
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
92 # Generate fingerprints for a SD file...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
93 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
94 sub GenerateMACCSKeysFingerprints {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
95 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
96 my($CmpdCount, $IgnoredCmpdCount, $SDFile, $MoleculeFileIO, $Molecule, $MACCSKeysFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
97
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
98 $SDFile = $SDFilesList[$FileIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
99
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
100 # Setup output files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
101 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
102 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = SetupAndOpenOutputFiles($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
103
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
104 $MoleculeFileIO = new MoleculeFileIO('Name' => $SDFile);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
105 $MoleculeFileIO->Open();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
106
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
107 $CmpdCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
108 $IgnoredCmpdCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
109
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
110 COMPOUND: while ($Molecule = $MoleculeFileIO->ReadMolecule()) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
111 $CmpdCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
112
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
113 # Filter compound data before calculating fingerprints...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
114 if ($OptionsInfo{Filter}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
115 if (CheckAndFilterCompound($CmpdCount, $Molecule)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
116 $IgnoredCmpdCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
117 next COMPOUND;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
118 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
119 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
120
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
121 $MACCSKeysFingerprints = GenerateMoleculeFingerprints($Molecule);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
122 if (!$MACCSKeysFingerprints) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
123 $IgnoredCmpdCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
124 ProcessIgnoredCompound('FingerprintsGenerationFailed', $CmpdCount, $Molecule);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
125 next COMPOUND;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
126 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
127
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
128 WriteDataToOutputFiles($FileIndex, $CmpdCount, $Molecule, $MACCSKeysFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
129 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
130 $MoleculeFileIO->Close();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
131
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
132 if ($NewFPSDFileIO) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
133 $NewFPSDFileIO->Close();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
134 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
135 if ($NewFPTextFileIO) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
136 $NewFPTextFileIO->Close();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
137 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
138 if ($NewFPFileIO) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
139 $NewFPFileIO->Close();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
140 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
141
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
142 WriteFingerprintsGenerationSummaryStatistics($CmpdCount, $IgnoredCmpdCount);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
143 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
144
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
145 # Process compound being ignored due to problems in fingerprints geneation...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
146 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
147 sub ProcessIgnoredCompound {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
148 my($Mode, $CmpdCount, $Molecule) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
149 my($CmpdID, $DataFieldLabelAndValuesRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
150
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
151 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
152 $CmpdID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
153
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
154 MODE: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
155 if ($Mode =~ /^ContainsNonElementalData$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
156 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains atom data corresponding to non-elemental atom symbol(s)...\n\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
157 next MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
158 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
159
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
160 if ($Mode =~ /^ContainsNoElementalData$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
161 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains no atom data...\n\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
162 next MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
163 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
164
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
165 if ($Mode =~ /^FingerprintsGenerationFailed$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
166 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
167 next MODE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
168 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
169 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
170 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
171 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
172
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
173 # Check and filter compounds....
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
174 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
175 sub CheckAndFilterCompound {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
176 my($CmpdCount, $Molecule) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
177 my($ElementCount, $NonElementCount);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
178
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
179 ($ElementCount, $NonElementCount) = $Molecule->GetNumOfElementsAndNonElements();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
180
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
181 if ($NonElementCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
182 ProcessIgnoredCompound('ContainsNonElementalData', $CmpdCount, $Molecule);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
183 return 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
184 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
185
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
186 if (!$ElementCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
187 ProcessIgnoredCompound('ContainsNoElementalData', $CmpdCount, $Molecule);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
188 return 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
189 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
190
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
191 return 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
192 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
193
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
194 # Write out compounds fingerprints generation summary statistics...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
195 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
196 sub WriteFingerprintsGenerationSummaryStatistics {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
197 my($CmpdCount, $IgnoredCmpdCount) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
198 my($ProcessedCmpdCount);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
199
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
200 $ProcessedCmpdCount = $CmpdCount - $IgnoredCmpdCount;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
201
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
202 print "\nNumber of compounds: $CmpdCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
203 print "Number of compounds processed successfully during fingerprints generation: $ProcessedCmpdCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
204 print "Number of compounds ignored during fingerprints generation: $IgnoredCmpdCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
205 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
206
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
207 # Open output files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
208 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
209 sub SetupAndOpenOutputFiles {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
210 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
211 my($NewFPSDFile, $NewFPFile, $NewFPTextFile, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO, %FingerprintsFileIOParams);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
212
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
213 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = (undef) x 3;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
214
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
215 # Setup common parameters for fingerprints file IO objects...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
216 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
217 %FingerprintsFileIOParams = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
218 if ($OptionsInfo{Mode} =~ /^MACCSKeyBits$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
219 %FingerprintsFileIOParams = ('Mode' => 'Write', 'Overwrite' => $OptionsInfo{OverwriteFiles}, 'FingerprintsStringMode' => 'FingerprintsBitVectorString', 'BitStringFormat' => $OptionsInfo{BitStringFormat}, 'BitsOrder' => $OptionsInfo{BitsOrder});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
220 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
221 elsif ($OptionsInfo{Mode} =~ /^MACCSKeyCount$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
222 %FingerprintsFileIOParams = ('Mode' => 'Write', 'Overwrite' => $OptionsInfo{OverwriteFiles}, 'FingerprintsStringMode' => 'FingerprintsVectorString', 'VectorStringFormat' => $OptionsInfo{VectorStringFormat});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
223 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
224
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
225 if ($OptionsInfo{SDOutput}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
226 $NewFPSDFile = $SDFilesInfo{SDOutFileNames}[$FileIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
227 print "Generating SD file $NewFPSDFile...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
228 $NewFPSDFileIO = new FileIO::FingerprintsSDFileIO('Name' => $NewFPSDFile, %FingerprintsFileIOParams, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsLabel});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
229 $NewFPSDFileIO->Open();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
230 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
231
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
232 if ($OptionsInfo{FPOutput}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
233 $NewFPFile = $SDFilesInfo{FPOutFileNames}[$FileIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
234 print "Generating FP file $NewFPFile...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
235 $NewFPFileIO = new FileIO::FingerprintsFPFileIO('Name' => $NewFPFile, %FingerprintsFileIOParams);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
236 $NewFPFileIO->Open();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
237 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
238
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
239 if ($OptionsInfo{TextOutput}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
240 my($ColLabelsRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
241
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
242 $NewFPTextFile = $SDFilesInfo{TextOutFileNames}[$FileIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
243 $ColLabelsRef = SetupFPTextFileCoulmnLabels($FileIndex);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
244
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
245 print "Generating text file $NewFPTextFile...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
246 $NewFPTextFileIO = new FileIO::FingerprintsTextFileIO('Name' => $NewFPTextFile, %FingerprintsFileIOParams, 'DataColLabels' => $ColLabelsRef, 'OutDelim' => $OptionsInfo{OutDelim}, 'OutQuote' => $OptionsInfo{OutQuote});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
247 $NewFPTextFileIO->Open();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
248 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
249
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
250 return ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
251 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
252
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
253 # Write fingerpritns and other data to appropriate output files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
254 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
255 sub WriteDataToOutputFiles {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
256 my($FileIndex, $CmpdCount, $Molecule, $MACCSKeysFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
257 my($DataFieldLabelAndValuesRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
258
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
259 $DataFieldLabelAndValuesRef = undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
260 if ($NewFPTextFileIO || $NewFPFileIO) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
261 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
262 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
263
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
264 if ($NewFPSDFileIO) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
265 my($CmpdString);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
266
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
267 $CmpdString = $Molecule->GetInputMoleculeString();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
268 $NewFPSDFileIO->WriteFingerprints($MACCSKeysFingerprints, $CmpdString);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
269 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
270
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
271 if ($NewFPTextFileIO) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
272 my($ColValuesRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
273
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
274 $ColValuesRef = SetupFPTextFileCoulmnValues($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
275 $NewFPTextFileIO->WriteFingerprints($MACCSKeysFingerprints, $ColValuesRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
276 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
277
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
278 if ($NewFPFileIO) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
279 my($CompoundID);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
280
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
281 $CompoundID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
282 $NewFPFileIO->WriteFingerprints($MACCSKeysFingerprints, $CompoundID);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
283 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
284 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
285
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
286 # Generate approriate column labels for FPText output file...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
287 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
288 sub SetupFPTextFileCoulmnLabels {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
289 my($FileIndex) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
290 my($Line, @ColLabels);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
291
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
292 @ColLabels = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
293 if ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
294 push @ColLabels, @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
295 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
296 elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
297 push @ColLabels, @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
298 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
299 elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
300 push @ColLabels, @{$OptionsInfo{SpecifiedDataFields}};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
301 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
302 elsif ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
303 push @ColLabels, $OptionsInfo{CompoundIDLabel};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
304 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
305 # Add fingerprints label...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
306 push @ColLabels, $OptionsInfo{FingerprintsLabel};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
307
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
308 return \@ColLabels;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
309 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
310
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
311 # Generate column values FPText output file..
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
312 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
313 sub SetupFPTextFileCoulmnValues {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
314 my($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
315 my(@ColValues);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
316
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
317 @ColValues = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
318 if ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
319 push @ColValues, SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
320 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
321 elsif ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
322 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
323 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
324 elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
325 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
326 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
327 elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
328 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$OptionsInfo{SpecifiedDataFields}};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
329 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
330
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
331 return \@ColValues;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
332 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
333
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
334 # Generate compound ID for FP and FPText output files..
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
335 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
336 sub SetupCmpdIDForOutputFiles {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
337 my($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
338 my($CmpdID);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
339
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
340 $CmpdID = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
341 if ($OptionsInfo{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
342 my($MolName);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
343 $MolName = $Molecule->GetName();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
344 $CmpdID = $MolName ? $MolName : "$OptionsInfo{CompoundID}${CmpdCount}";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
345 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
346 elsif ($OptionsInfo{CompoundIDMode} =~ /^LabelPrefix$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
347 $CmpdID = "$OptionsInfo{CompoundID}${CmpdCount}";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
348 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
349 elsif ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
350 my($SpecifiedDataField);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
351 $SpecifiedDataField = $OptionsInfo{CompoundID};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
352 $CmpdID = exists $DataFieldLabelAndValuesRef->{$SpecifiedDataField} ? $DataFieldLabelAndValuesRef->{$SpecifiedDataField} : '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
353 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
354 elsif ($OptionsInfo{CompoundIDMode} =~ /^MolName$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
355 $CmpdID = $Molecule->GetName();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
356 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
357 return $CmpdID;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
358 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
359
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
360 # Generate fingerprints for molecule...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
361 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
362 sub GenerateMoleculeFingerprints {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
363 my($Molecule) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
364 my($MACCSKeysFingerprints);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
365
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
366 if ($OptionsInfo{KeepLargestComponent}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
367 $Molecule->KeepLargestComponent();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
368 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
369 if (!$Molecule->DetectRings()) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
370 return undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
371 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
372 $Molecule->SetAromaticityModel($OptionsInfo{AromaticityModel});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
373 $Molecule->DetectAromaticity();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
374
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
375 $MACCSKeysFingerprints = undef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
376 if ($OptionsInfo{Mode} =~ /^MACCSKeyBits$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
377 $MACCSKeysFingerprints = new Fingerprints::MACCSKeys('Molecule' => $Molecule, 'Type' => 'MACCSKeyBits', 'Size' => $OptionsInfo{Size});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
378 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
379 elsif ($OptionsInfo{Mode} =~ /^MACCSKeyCount$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
380 $MACCSKeysFingerprints = new Fingerprints::MACCSKeys('Molecule' => $Molecule, 'Type' => 'MACCSKeyCount', 'Size' => $OptionsInfo{Size});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
381 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
382 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
383 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: MACCSKeyBits or MACCSKeyCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
384 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
385 $MACCSKeysFingerprints->GenerateMACCSKeys();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
386
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
387 return $MACCSKeysFingerprints;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
388 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
389
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
390 # Retrieve information about SD files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
391 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
392 sub RetrieveSDFilesInfo {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
393 my($SDFile, $Index, $FileDir, $FileExt, $FileName, $OutFileRoot, $TextOutFileExt, $SDOutFileExt, $FPOutFileExt, $NewSDFileName, $NewFPFileName, $NewTextFileName, $CheckDataField, $CollectDataFields, $AllDataFieldsRef, $CommonDataFieldsRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
394
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
395 %SDFilesInfo = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
396 @{$SDFilesInfo{FileOkay}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
397 @{$SDFilesInfo{OutFileRoot}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
398 @{$SDFilesInfo{SDOutFileNames}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
399 @{$SDFilesInfo{FPOutFileNames}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
400 @{$SDFilesInfo{TextOutFileNames}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
401 @{$SDFilesInfo{AllDataFieldsRef}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
402 @{$SDFilesInfo{CommonDataFieldsRef}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
403
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
404 $CheckDataField = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) && ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i)) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
405 $CollectDataFields = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^(All|Common)$/i)) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
406
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
407 FILELIST: for $Index (0 .. $#SDFilesList) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
408 $SDFile = $SDFilesList[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
409
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
410 $SDFilesInfo{FileOkay}[$Index] = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
411 $SDFilesInfo{OutFileRoot}[$Index] = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
412 $SDFilesInfo{SDOutFileNames}[$Index] = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
413 $SDFilesInfo{FPOutFileNames}[$Index] = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
414 $SDFilesInfo{TextOutFileNames}[$Index] = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
415
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
416 $SDFile = $SDFilesList[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
417 if (!(-e $SDFile)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
418 warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
419 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
420 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
421 if (!CheckFileType($SDFile, "sd sdf")) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
422 warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
423 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
424 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
425
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
426 if ($CheckDataField) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
427 # Make sure data field exists in SD file..
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
428 my($CmpdString, $SpecifiedDataField, @CmpdLines, %DataFieldValues);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
429
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
430 @CmpdLines = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
431 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
432 $CmpdString = ReadCmpdString(\*SDFILE);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
433 close SDFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
434 @CmpdLines = split "\n", $CmpdString;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
435 %DataFieldValues = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
436 $SpecifiedDataField = $OptionsInfo{CompoundID};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
437 if (!exists $DataFieldValues{$SpecifiedDataField}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
438 warn "Warning: Ignoring file $SDFile: Data field value, $SpecifiedDataField, using \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\" doesn't exist\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
439 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
440 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
441 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
442
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
443 $AllDataFieldsRef = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
444 $CommonDataFieldsRef = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
445 if ($CollectDataFields) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
446 my($CmpdCount);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
447 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
448 ($CmpdCount, $AllDataFieldsRef, $CommonDataFieldsRef) = GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
449 close SDFILE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
450 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
451
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
452 # Setup output file names...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
453 $FileDir = ""; $FileName = ""; $FileExt = "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
454 ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
455
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
456 $TextOutFileExt = "csv";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
457 if ($Options{outdelim} =~ /^tab$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
458 $TextOutFileExt = "tsv";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
459 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
460 $SDOutFileExt = $FileExt;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
461 $FPOutFileExt = "fpf";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
462
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
463 if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
464 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
465 if ($RootFileName && $RootFileExt) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
466 $FileName = $RootFileName;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
467 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
468 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
469 $FileName = $OptionsInfo{OutFileRoot};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
470 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
471 $OutFileRoot = $FileName;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
472 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
473 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
474 $OutFileRoot = "${FileName}MACCSKeysFP";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
475 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
476
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
477 $NewSDFileName = "${OutFileRoot}.${SDOutFileExt}";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
478 $NewFPFileName = "${OutFileRoot}.${FPOutFileExt}";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
479 $NewTextFileName = "${OutFileRoot}.${TextOutFileExt}";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
480
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
481 if ($OptionsInfo{SDOutput}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
482 if ($SDFile =~ /$NewSDFileName/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
483 warn "Warning: Ignoring input file $SDFile: Same output, $NewSDFileName, and input file names.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
484 print "Specify a different name using \"-r --root\" option or use default name.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
485 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
486 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
487 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
488
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
489 if (!$OptionsInfo{OverwriteFiles}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
490 # Check SD and text outout files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
491 if ($OptionsInfo{SDOutput}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
492 if (-e $NewSDFileName) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
493 warn "Warning: Ignoring file $SDFile: The file $NewSDFileName already exists\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
494 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
495 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
496 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
497 if ($OptionsInfo{FPOutput}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
498 if (-e $NewFPFileName) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
499 warn "Warning: Ignoring file $SDFile: The file $NewFPFileName already exists\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
500 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
501 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
502 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
503 if ($OptionsInfo{TextOutput}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
504 if (-e $NewTextFileName) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
505 warn "Warning: Ignoring file $SDFile: The file $NewTextFileName already exists\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
506 next FILELIST;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
507 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
508 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
509 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
510
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
511 $SDFilesInfo{FileOkay}[$Index] = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
512
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
513 $SDFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
514 $SDFilesInfo{SDOutFileNames}[$Index] = $NewSDFileName;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
515 $SDFilesInfo{FPOutFileNames}[$Index] = $NewFPFileName;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
516 $SDFilesInfo{TextOutFileNames}[$Index] = $NewTextFileName;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
517
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
518 $SDFilesInfo{AllDataFieldsRef}[$Index] = $AllDataFieldsRef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
519 $SDFilesInfo{CommonDataFieldsRef}[$Index] = $CommonDataFieldsRef;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
520 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
521 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
522
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
523 # Process option values...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
524 sub ProcessOptions {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
525 %OptionsInfo = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
526
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
527 $OptionsInfo{Mode} = $Options{mode};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
528 $OptionsInfo{AromaticityModel} = $Options{aromaticitymodel};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
529
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
530 $OptionsInfo{BitsOrder} = $Options{bitsorder};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
531 $OptionsInfo{BitStringFormat} = $Options{bitstringformat};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
532
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
533 $OptionsInfo{CompoundIDMode} = $Options{compoundidmode};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
534 $OptionsInfo{CompoundIDLabel} = $Options{compoundidlabel};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
535 $OptionsInfo{DataFieldsMode} = $Options{datafieldsmode};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
536
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
537 $OptionsInfo{Filter} = ($Options{filter} =~ /^Yes$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
538
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
539 my(@SpecifiedDataFields);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
540 @SpecifiedDataFields = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
541
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
542 @{$OptionsInfo{SpecifiedDataFields}} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
543 $OptionsInfo{CompoundID} = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
544
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
545 if ($Options{datafieldsmode} =~ /^CompoundID$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
546 if ($Options{compoundidmode} =~ /^DataField$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
547 if (!$Options{compoundid}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
548 die "Error: You must specify a value for \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\". \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
549 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
550 $OptionsInfo{CompoundID} = $Options{compoundid};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
551 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
552 elsif ($Options{compoundidmode} =~ /^(LabelPrefix|MolNameOrLabelPrefix)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
553 $OptionsInfo{CompoundID} = $Options{compoundid} ? $Options{compoundid} : 'Cmpd';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
554 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
555 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
556 elsif ($Options{datafieldsmode} =~ /^Specify$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
557 if (!$Options{datafields}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
558 die "Error: You must specify a value for \"--DataFields\" option in \"Specify\" \"-d, --DataFieldsMode\". \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
559 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
560 @SpecifiedDataFields = split /\,/, $Options{datafields};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
561 push @{$OptionsInfo{SpecifiedDataFields}}, @SpecifiedDataFields;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
562 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
563
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
564 $OptionsInfo{FingerprintsLabel} = $Options{fingerprintslabel} ? $Options{fingerprintslabel} : 'MACCSKeysFingerprints';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
565
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
566 $OptionsInfo{KeepLargestComponent} = ($Options{keeplargestcomponent} =~ /^Yes$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
567
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
568 $OptionsInfo{Output} = $Options{output};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
569 $OptionsInfo{SDOutput} = ($Options{output} =~ /^(SD|All)$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
570 $OptionsInfo{FPOutput} = ($Options{output} =~ /^(FP|All)$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
571 $OptionsInfo{TextOutput} = ($Options{output} =~ /^(Text|All)$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
572
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
573 $OptionsInfo{OutDelim} = $Options{outdelim};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
574 $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
575
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
576 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
577 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
578
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
579 $OptionsInfo{Size} = $Options{size};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
580
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
581 $OptionsInfo{VectorStringFormat} = $Options{vectorstringformat};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
582 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
583
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
584 # Setup script usage and retrieve command line arguments specified using various options...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
585 sub SetupScriptUsage {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
586
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
587 # Retrieve all the options...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
588 %Options = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
589
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
590 $Options{aromaticitymodel} = 'MayaChemToolsAromaticityModel';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
591
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
592 $Options{bitsorder} = 'Ascending';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
593 $Options{bitstringformat} = 'BinaryString';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
594
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
595 $Options{compoundidmode} = 'LabelPrefix';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
596 $Options{compoundidlabel} = 'CompoundID';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
597 $Options{datafieldsmode} = 'CompoundID';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
598
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
599 $Options{filter} = 'Yes';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
600
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
601 $Options{keeplargestcomponent} = 'Yes';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
602
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
603 $Options{mode} = 'MACCSKeyBits';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
604
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
605 $Options{output} = 'text';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
606 $Options{outdelim} = 'comma';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
607 $Options{quote} = 'yes';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
608
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
609 $Options{size} = 166;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
610
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
611 $Options{vectorstringformat} = 'ValuesString';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
612
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
613 if (!GetOptions(\%Options, "aromaticitymodel=s", "bitsorder=s", "bitstringformat|b=s", "compoundid=s", "compoundidlabel=s", "compoundidmode=s", "datafields=s", "datafieldsmode|d=s", "filter|f=s", "fingerprintslabel=s", "help|h", "keeplargestcomponent|k=s", "mode|m=s", "outdelim=s", "output=s", "overwrite|o", "quote|q=s", "root|r=s", "size|s=i", "vectorstringformat|v=s", "workingdir|w=s")) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
614 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
615 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
616 if ($Options{workingdir}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
617 if (! -d $Options{workingdir}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
618 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
619 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
620 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
621 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
622 if (!Molecule::IsSupportedAromaticityModel($Options{aromaticitymodel})) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
623 my(@SupportedModels) = Molecule::GetSupportedAromaticityModels();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
624 die "Error: The value specified, $Options{aromaticitymodel}, for option \"--AromaticityModel\" is not valid. Supported aromaticity models in current release of MayaChemTools: @SupportedModels\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
625 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
626 if ($Options{bitsorder} !~ /^(Ascending|Descending)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
627 die "Error: The value specified, $Options{bitsorder}, for option \"--BitsOrder\" is not valid. Allowed values: Ascending or Descending\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
628 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
629 if ($Options{bitstringformat} !~ /^(BinaryString|HexadecimalString)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
630 die "Error: The value specified, $Options{bitstringformat}, for option \"-b, --bitstringformat\" is not valid. Allowed values: BinaryString or HexadecimalString\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
631 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
632 if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
633 die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
634 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
635 if ($Options{datafieldsmode} !~ /^(All|Common|Specify|CompoundID)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
636 die "Error: The value specified, $Options{datafieldsmode}, for option \"-d, --DataFieldsMode\" is not valid. Allowed values: All, Common, Specify or CompoundID\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
637 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
638 if ($Options{filter} !~ /^(Yes|No)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
639 die "Error: The value specified, $Options{filter}, for option \"-f, --Filter\" is not valid. Allowed values: Yes or No\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
640 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
641 if ($Options{keeplargestcomponent} !~ /^(Yes|No)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
642 die "Error: The value specified, $Options{keeplargestcomponent}, for option \"-k, --KeepLargestComponent\" is not valid. Allowed values: Yes or No\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
643 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
644 if ($Options{mode} !~ /^(MACCSKeyBits|MACCSKeyCount)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
645 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: MACCSKeyBits or MACCSKeyCount\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
646 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
647 if ($Options{output} !~ /^(SD|FP|text|all)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
648 die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, FP, text, or all\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
649 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
650 if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
651 die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
652 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
653 if ($Options{quote} !~ /^(Yes|No)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
654 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
655 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
656 if ($Options{outdelim} =~ /semicolon/i && $Options{quote} =~ /^No$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
657 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not allowed with, semicolon value of \"--outdelim\" option: Fingerprints string use semicolon as delimiter for various data fields and must be quoted.\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
658 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
659 if (!(IsPositiveInteger($Options{size}) && ($Options{size} == 166 || $Options{size} == 322))) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
660 die "Error: The value specified, $Options{size}, for option \"-s, --size\" is not valid. Allowed values: 166 or 322 \n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
661 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
662 if ($Options{vectorstringformat} !~ /^(ValuesString|IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString)$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
663 die "Error: The value specified, $Options{vectorstringformat}, for option \"-v, --VectorStringFormat\" is not valid. Allowed values: ValuesString, IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
664 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
665 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
666
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
667 __END__
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
668
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
669 =head1 NAME
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
670
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
671 MACCSKeysFingerprints.pl - Generate MACCS key fingerprints for SD files
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
672
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
673 =head1 SYNOPSIS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
674
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
675 MACCSKeysFingerprints.pl SDFile(s)...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
676
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
677 MACCSKeysFingerprints.pl [B<--AromaticityModel> I<AromaticityModelType>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
678 [B<--BitsOrder> I<Ascending | Descending>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
679 [B<-b, --BitStringFormat> I<BinaryString | HexadecimalString>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
680 [B<--CompoundID> I<DataFieldName or LabelPrefixString>] [B<--CompoundIDLabel> I<text>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
681 [B<--CompoundIDMode> I<DataField | MolName | LabelPrefix | MolNameOrLabelPrefix>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
682 [B<--DataFields> I<"FieldLabel1,FieldLabel2,...">] [B<-d, --DataFieldsMode> I<All | Common | Specify | CompoundID>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
683 [B<-f, --Filter> I<Yes | No>] [B<--FingerprintsLabel> I<text>] [B<-h, --help>] [B<-k, --KeepLargestComponent> I<Yes | No>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
684 [B<-m, --mode> I<MACCSKeyBits | MACCSKeyCount>] [B<--OutDelim> I<comma | tab | semicolon>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
685 [B<--output> I<SD | FP | text | all>] [B<-o, --overwrite>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
686 [B<-q, --quote> I<Yes | No>] [B<-r, --root> I<RootName>] [B<-s, --size> I<number>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
687 [B<-v, --VectorStringFormat> I<IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
688 [B<-w, --WorkingDir> I<DirName>]
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
689
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
690 =head1 DESCRIPTION
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
691
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
692 Generate MACCS (Molecular ACCess System) keys fingerprints [ Ref 45-47 ] for I<SDFile(s)>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
693 and create appropriate SD, FP or CSV/TSV text file(s) containing fingerprints bit-vector or
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
694 vector strings corresponding to molecular fingerprints.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
695
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
696 Multiple SDFile names are separated by spaces. The valid file extensions are I<.sdf>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
697 and I<.sd>. All other file names are ignored. All the SD files in a current directory
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
698 can be specified either by I<*.sdf> or the current directory name.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
699
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
700 For each MACCS keys definition, atoms are processed to determine their membership to the key
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
701 and the appropriate molecular fingerprints strings are generated. An atom can belong to multiple
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
702 MACCS keys.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
703
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
704 For I<MACCSKeyBits> value of B<-m, --mode> option, a fingerprint bit-vector string containing
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
705 zeros and ones is generated and for I<MACCSKeyCount> value, a fingerprint vector string
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
706 corresponding to number of MACCS keys [ Ref 45-47 ] is generated.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
707
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
708 I<MACCSKeyBits | MACCSKeyCount> values for B<-m, --mode> option along with two possible
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
709 I<166 | 322> values of B<-s, --size> supports generation of four different types of MACCS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
710 keys fingerprint: I<MACCS166KeyBits, MACCS166KeyCount, MACCS322KeyBits, MACCS322KeyCount>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
711
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
712 Example of I<SD> file containing MAACS keys fingerprints string data:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
713
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
714 ... ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
715 ... ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
716 $$$$
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
717 ... ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
718 ... ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
719 ... ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
720 41 44 0 0 0 0 0 0 0 0999 V2000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
721 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
722 ... ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
723 2 3 1 0 0 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
724 ... ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
725 M END
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
726 > <CmpdID>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
727 Cmpd1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
728
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
729 > <MACCSKeysFingerprints>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
730 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
731 00000000000000000000000000000000100100001001000000001001000000001110001
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
732 00101010111100011011000100110110000011011110100110111111111111011111111
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
733 11111111110111000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
734
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
735 $$$$
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
736 ... ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
737 ... ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
738
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
739 Example of I<FP> file containing MAACS keys fingerprints string data:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
740
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
741 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
742 # Package = MayaChemTools 7.4
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
743 # Release Date = Oct 21, 2010
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
744 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
745 # TimeStamp = Fri Mar 11 14:57:24 2011
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
746 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
747 # FingerprintsStringType = FingerprintsBitVector
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
748 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
749 # Description = MACCSKeyBits
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
750 # Size = 166
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
751 # BitStringFormat = BinaryString
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
752 # BitsOrder = Ascending
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
753 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
754 Cmpd1 00000000000000000000000000000000000000000100100001001000000001...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
755 Cmpd2 00000000000000000000000010000000001000000010000000001000000000...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
756 ... ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
757 ... ..
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
758
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
759 Example of CSV I<Text> file containing MAACS keys fingerprints string data:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
760
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
761 "CompoundID","MACCSKeysFingerprints"
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
762 "Cmpd1","FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
763 00000000000000000000000000000000000000000100100001001000000001001000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
764 00111000100101010111100011011000100110110000011011110100110111111111111
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
765 01111111111111111110111000"
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
766 ... ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
767 ... ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
768
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
769 The current release of MayaChemTools generates the following types of MACCS keys
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
770 fingerprints bit-vector and vector strings:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
771
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
772 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
773 0000000000000000000000000000000001001000010010000000010010000000011100
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
774 0100101010111100011011000100110110000011011110100110111111111111011111
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
775 11111111111110111000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
776
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
777 FingerprintsBitVector;MACCSKeyBits;166;HexadecimalString;Ascending;000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
778 000000021210210e845f8d8c60b79dffbffffd1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
779
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
780 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
781 1110011111100101111111000111101100110000000000000011100010000000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
782 0000000000000000000000000000000000000000000000101000000000000000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
783 0000000000000000000000000000000000000000000000000000000000000000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
784 0000000000000000000000000000000000000011000000000000000000000000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
785 0000000000000000000000000000000000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
786
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
787 FingerprintsBitVector;MACCSKeyBits;322;HexadecimalString;Ascending;7d7
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
788 e7af3edc000c1100000000000000500000000000000000000000000000000300000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
789 000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
790
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
791 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
792 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
793 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
794 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
795 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
796 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
797
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
798 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
799 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
800 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
801 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
802 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
803 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
804
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
805 =head1 OPTIONS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
806
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
807 =over 4
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
808
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
809 =item B<--AromaticityModel> I<MDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
810
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
811 Specify aromaticity model to use during detection of aromaticity. Possible values in the current
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
812 release are: I<MDLAromaticityModel, TriposAromaticityModel, MMFFAromaticityModel,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
813 ChemAxonBasicAromaticityModel, ChemAxonGeneralAromaticityModel, DaylightAromaticityModel
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
814 or MayaChemToolsAromaticityModel>. Default value: I<MayaChemToolsAromaticityModel>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
815
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
816 The supported aromaticity model names along with model specific control parameters
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
817 are defined in B<AromaticityModelsData.csv>, which is distributed with the current release
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
818 and is available under B<lib/data> directory. B<Molecule.pm> module retrieves data from
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
819 this file during class instantiation and makes it available to method B<DetectAromaticity>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
820 for detecting aromaticity corresponding to a specific model.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
821
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
822 =item B<--BitsOrder> I<Ascending | Descending>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
823
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
824 Bits order to use during generation of fingerprints bit-vector string for I<MACCSKeyBits> value of
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
825 B<-m, --mode> option. Possible values: I<Ascending, Descending>. Default: I<Ascending>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
826
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
827 I<Ascending> bit order which corresponds to first bit in each byte as the lowest bit as
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
828 opposed to the highest bit.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
829
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
830 Internally, bits are stored in I<Ascending> order using Perl vec function. Regardless
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
831 of machine order, big-endian or little-endian, vec function always considers first
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
832 string byte as the lowest byte and first bit within each byte as the lowest bit.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
833
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
834 =item B<-b, --BitStringFormat> I<BinaryString | HexadecimalString>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
835
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
836 Format of fingerprints bit-vector string data in output SD, FP or CSV/TSV text file(s) specified by
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
837 B<--output> used during I<MACCSKeyBits> value of B<-m, --mode> option. Possible
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
838 values: I<BinaryString, HexadecimalString>. Default value: I<BinaryString>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
839
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
840 I<BinaryString> corresponds to an ASCII string containing 1s and 0s. I<HexadecimalString>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
841 contains bit values in ASCII hexadecimal format.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
842
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
843 Examples:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
844
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
845 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
846 0000000000000000000000000000000001001000010010000000010010000000011100
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
847 0100101010111100011011000100110110000011011110100110111111111111011111
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
848 11111111111110111000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
849
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
850 FingerprintsBitVector;MACCSKeyBits;166;HexadecimalString;Ascending;000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
851 000000021210210e845f8d8c60b79dffbffffd1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
852
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
853 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
854 1110011111100101111111000111101100110000000000000011100010000000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
855 0000000000000000000000000000000000000000000000101000000000000000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
856 0000000000000000000000000000000000000000000000000000000000000000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
857 0000000000000000000000000000000000000011000000000000000000000000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
858 0000000000000000000000000000000000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
859
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
860 FingerprintsBitVector;MACCSKeyBits;322;HexadecimalString;Ascending;7d7
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
861 e7af3edc000c1100000000000000500000000000000000000000000000000300000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
862 000000000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
863
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
864 =item B<--CompoundID> I<DataFieldName or LabelPrefixString>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
865
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
866 This value is B<--CompoundIDMode> specific and indicates how compound ID is generated.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
867
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
868 For I<DataField> value of B<--CompoundIDMode> option, it corresponds to datafield label name
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
869 whose value is used as compound ID; otherwise, it's a prefix string used for generating compound
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
870 IDs like LabelPrefixString<Number>. Default value, I<Cmpd>, generates compound IDs which
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
871 look like Cmpd<Number>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
872
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
873 Examples for I<DataField> value of B<--CompoundIDMode>:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
874
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
875 MolID
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
876 ExtReg
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
877
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
878 Examples for I<LabelPrefix> or I<MolNameOrLabelPrefix> value of B<--CompoundIDMode>:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
879
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
880 Compound
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
881
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
882 The value specified above generates compound IDs which correspond to Compound<Number>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
883 instead of default value of Cmpd<Number>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
884
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
885 =item B<--CompoundIDLabel> I<text>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
886
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
887 Specify compound ID column label for FP or CSV/TSV text file(s) used during I<CompoundID> value
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
888 of B<--DataFieldsMode> option. Default: I<CompoundID>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
889
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
890 =item B<--CompoundIDMode> I<DataField | MolName | LabelPrefix | MolNameOrLabelPrefix>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
891
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
892 Specify how to generate compound IDs and write to FP or CSV/TSV text file(s) along with generated
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
893 fingerprints for I<FP | text | all> values of B<--output> option: use a I<SDFile(s)> datafield value;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
894 use molname line from I<SDFile(s)>; generate a sequential ID with specific prefix; use combination
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
895 of both MolName and LabelPrefix with usage of LabelPrefix values for empty molname lines.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
896
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
897 Possible values: I<DataField | MolName | LabelPrefix | MolNameOrLabelPrefix>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
898 Default: I<LabelPrefix>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
899
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
900 For I<MolNameAndLabelPrefix> value of B<--CompoundIDMode>, molname line in I<SDFile(s)> takes
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
901 precedence over sequential compound IDs generated using I<LabelPrefix> and only empty molname
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
902 values are replaced with sequential compound IDs.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
903
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
904 This is only used for I<CompoundID> value of B<--DataFieldsMode> option.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
905
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
906 =item B<--DataFields> I<"FieldLabel1,FieldLabel2,...">
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
907
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
908 Comma delimited list of I<SDFiles(s)> data fields to extract and write to CSV/TSV text file(s) along
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
909 with generated fingerprints for I<text | all> values of B<--output> option.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
910
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
911 This is only used for I<Specify> value of B<--DataFieldsMode> option.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
912
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
913 Examples:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
914
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
915 Extreg
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
916 MolID,CompoundName
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
917
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
918 =item B<-d, --DataFieldsMode> I<All | Common | Specify | CompoundID>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
919
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
920 Specify how data fields in I<SDFile(s)> are transferred to output CSV/TSV text file(s) along
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
921 with generated fingerprints for I<text | all> values of B<--output> option: transfer all SD
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
922 data field; transfer SD data files common to all compounds; extract specified data fields;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
923 generate a compound ID using molname line, a compound prefix, or a combination of both.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
924 Possible values: I<All | Common | specify | CompoundID>. Default value: I<CompoundID>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
925
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
926 =item B<-f, --Filter> I<Yes | No>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
927
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
928 Specify whether to check and filter compound data in SDFile(s). Possible values: I<Yes or No>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
929 Default value: I<Yes>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
930
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
931 By default, compound data is checked before calculating fingerprints and compounds containing
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
932 atom data corresponding to non-element symbols or no atom data are ignored.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
933
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
934 =item B<--FingerprintsLabel> I<text>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
935
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
936 SD data label or text file column label to use for fingerprints string in output SD or
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
937 CSV/TSV text file(s) specified by B<--output>. Default value: I<MACCSKeyFingerprints>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
938
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
939 =item B<-h, --help>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
940
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
941 Print this help message.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
942
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
943 =item B<-k, --KeepLargestComponent> I<Yes | No>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
944
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
945 Generate fingerprints for only the largest component in molecule. Possible values:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
946 I<Yes or No>. Default value: I<Yes>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
947
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
948 For molecules containing multiple connected components, fingerprints can be generated
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
949 in two different ways: use all connected components or just the largest connected
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
950 component. By default, all atoms except for the largest connected component are
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
951 deleted before generation of fingerprints.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
952
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
953 =item B<-m, --mode> I<MACCSKeyBits | MACCSKeyCount>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
954
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
955 Specify type of MACCS keys [ Ref 45-47 ] fingerprints to generate for molecules in I<SDFile(s)>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
956 Possible values: I<MACCSKeyBits, MACCSKeyCount>. Default value: I<MACCSKeyBits>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
957
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
958 For I<MACCSKeyBits> value of B<-m, --mode> option, a fingerprint bit-vector string containing
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
959 zeros and ones is generated and for I<MACCSKeyCount> value, a fingerprint vector string
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
960 corresponding to number of MACCS keys is generated.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
961
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
962 I<MACCSKeyBits | MACCSKeyCount> values for B<-m, --mode> option along with two possible
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
963 I<166 | 322> values of B<-s, --size> supports generation of four different types of MACCS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
964 keys fingerprint: I<MACCS166KeyBits, MACCS166KeyCount, MACCS322KeyBits, MACCS322KeyCount>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
965
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
966 Definition of MACCS keys uses the following atom and bond symbols to define atom and
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
967 bond environments:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
968
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
969 Atom symbols for 166 keys [ Ref 47 ]:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
970
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
971 A : Any valid periodic table element symbol
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
972 Q : Hetro atoms; any non-C or non-H atom
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
973 X : Halogens; F, Cl, Br, I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
974 Z : Others; other than H, C, N, O, Si, P, S, F, Cl, Br, I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
975
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
976 Atom symbols for 322 keys [ Ref 46 ]:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
977
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
978 A : Any valid periodic table element symbol
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
979 Q : Hetro atoms; any non-C or non-H atom
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
980 X : Others; other than H, C, N, O, Si, P, S, F, Cl, Br, I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
981 Z is neither defined nor used
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
982
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
983 Bond types:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
984
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
985 - : Single
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
986 = : Double
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
987 T : Triple
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
988 # : Triple
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
989 ~ : Single or double query bond
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
990 % : An aromatic query bond
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
991
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
992 None : Any bond type; no explicit bond specified
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
993
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
994 $ : Ring bond; $ before a bond type specifies ring bond
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
995 ! : Chain or non-ring bond; ! before a bond type specifies chain bond
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
996
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
997 @ : A ring linkage and the number following it specifies the
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
998 atoms position in the line, thus @1 means linked back to the first
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
999 atom in the list.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1001 Aromatic: Kekule or Arom5
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1002
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1003 Kekule: Bonds in 6-membered rings with alternate single/double bonds
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1004 or perimeter bonds
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1005 Arom5: Bonds in 5-membered rings with two double bonds and a hetro
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1006 atom at the apex of the ring.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1007
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1008 MACCS 166 keys [ Ref 45-47 ] are defined as follows:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1009
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1010 Key Description
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1011
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1012 1 ISOTOPE
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1013 2 103 < ATOMIC NO. < 256
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1014 3 GROUP IVA,VA,VIA PERIODS 4-6 (Ge...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1015 4 ACTINIDE
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1016 5 GROUP IIIB,IVB (Sc...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1017 6 LANTHANIDE
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1018 7 GROUP VB,VIB,VIIB (V...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1019 8 QAAA@1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1020 9 GROUP VIII (Fe...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1021 10 GROUP IIA (ALKALINE EARTH)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1022 11 4M RING
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1023 12 GROUP IB,IIB (Cu...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1024 13 ON(C)C
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1025 14 S-S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1026 15 OC(O)O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1027 16 QAA@1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1028 17 CTC
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1029 18 GROUP IIIA (B...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1030 19 7M RING
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1031 20 SI
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1032 21 C=C(Q)Q
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1033 22 3M RING
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1034 23 NC(O)O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1035 24 N-O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1036 25 NC(N)N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1037 26 C$=C($A)$A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1038 27 I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1039 28 QCH2Q
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1040 29 P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1041 30 CQ(C)(C)A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1042 31 QX
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1043 32 CSN
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1044 33 NS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1045 34 CH2=A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1046 35 GROUP IA (ALKALI METAL)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1047 36 S HETEROCYCLE
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1048 37 NC(O)N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1049 38 NC(C)N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1050 39 OS(O)O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1051 40 S-O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1052 41 CTN
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1053 42 F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1054 43 QHAQH
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1055 44 OTHER
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1056 45 C=CN
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1057 46 BR
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1058 47 SAN
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1059 48 OQ(O)O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1060 49 CHARGE
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1061 50 C=C(C)C
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1062 51 CSO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1063 52 NN
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1064 53 QHAAAQH
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1065 54 QHAAQH
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1066 55 OSO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1067 56 ON(O)C
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1068 57 O HETEROCYCLE
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1069 58 QSQ
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1070 59 Snot%A%A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1071 60 S=O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1072 61 AS(A)A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1073 62 A$A!A$A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1074 63 N=O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1075 64 A$A!S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1076 65 C%N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1077 66 CC(C)(C)A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1078 67 QS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1079 68 QHQH (&...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1080 69 QQH
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1081 70 QNQ
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1082 71 NO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1083 72 OAAO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1084 73 S=A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1085 74 CH3ACH3
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1086 75 A!N$A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1087 76 C=C(A)A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1088 77 NAN
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1089 78 C=N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1090 79 NAAN
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1091 80 NAAAN
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1092 81 SA(A)A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1093 82 ACH2QH
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1094 83 QAAAA@1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1095 84 NH2
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1096 85 CN(C)C
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1097 86 CH2QCH2
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1098 87 X!A$A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1099 88 S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1100 89 OAAAO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1101 90 QHAACH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1102 91 QHAAACH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1103 92 OC(N)C
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1104 93 QCH3
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1105 94 QN
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1106 95 NAAO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1107 96 5M RING
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1108 97 NAAAO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1109 98 QAAAAA@1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1110 99 C=C
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1111 100 ACH2N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1112 101 8M RING
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1113 102 QO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1114 103 CL
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1115 104 QHACH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1116 105 A$A($A)$A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1117 106 QA(Q)Q
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1118 107 XA(A)A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1119 108 CH3AAACH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1120 109 ACH2O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1121 110 NCO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1122 111 NACH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1123 112 AA(A)(A)A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1124 113 Onot%A%A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1125 114 CH3CH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1126 115 CH3ACH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1127 116 CH3AACH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1128 117 NAO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1129 118 ACH2CH2A > 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1130 119 N=A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1131 120 HETEROCYCLIC ATOM > 1 (&...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1132 121 N HETEROCYCLE
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1133 122 AN(A)A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1134 123 OCO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1135 124 QQ
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1136 125 AROMATIC RING > 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1137 126 A!O!A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1138 127 A$A!O > 1 (&...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1139 128 ACH2AAACH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1140 129 ACH2AACH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1141 130 QQ > 1 (&...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1142 131 QH > 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1143 132 OACH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1144 133 A$A!N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1145 134 X (HALOGEN)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1146 135 Nnot%A%A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1147 136 O=A > 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1148 137 HETEROCYCLE
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1149 138 QCH2A > 1 (&...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1150 139 OH
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1151 140 O > 3 (&...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1152 141 CH3 > 2 (&...)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1153 142 N > 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1154 143 A$A!O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1155 144 Anot%A%Anot%A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1156 145 6M RING > 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1157 146 O > 2
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1158 147 ACH2CH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1159 148 AQ(A)A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1160 149 CH3 > 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1161 150 A!A$A!A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1162 151 NH
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1163 152 OC(C)C
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1164 153 QCH2A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1165 154 C=O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1166 155 A!CH2!A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1167 156 NA(A)A
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1168 157 C-O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1169 158 C-N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1170 159 O > 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1171 160 CH3
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1172 161 N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1173 162 AROMATIC
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1174 163 6M RING
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1175 164 O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1176 165 RING
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1177 166 FRAGMENTS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1178
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1179 MACCS 322 keys set as defined in tables 1, 2 and 3 [ Ref 46 ] include:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1180
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1181 . 26 atom properties of type P, as listed in Table 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1182 . 32 one-atom environments, as listed in Table 3
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1183 . 264 atom-bond-atom combinations listed in Table 4
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1184
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1185 Total number of keys in three tables is : 322
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1186
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1187 Atom symbol, X, used for 322 keys [ Ref 46 ] doesn't refer to Halogens as it does for 166 keys. In
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1188 order to keep the definition of 322 keys consistent with the published definitions, the symbol X is
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1189 used to imply "others" atoms, but it's internally mapped to symbol X as defined for 166 keys
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1190 during the generation of key values.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1191
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1192 Atom properties-based keys (26):
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1193
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1194 Key Description
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1195 1 A(AAA) or AA(A)A - atom with at least three neighbors
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1196 2 Q - heteroatom
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1197 3 Anot%not-A - atom involved in one or more multiple bonds, not aromatic
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1198 4 A(AAAA) or AA(A)(A)A - atom with at least four neighbors
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1199 5 A(QQ) or QA(Q) - atom with at least two heteroatom neighbors
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1200 6 A(QQQ) or QA(Q)Q - atom with at least three heteroatom neighbors
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1201 7 QH - heteroatom with at least one hydrogen attached
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1202 8 CH2(AA) or ACH2A - carbon with at least two single bonds and at least
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1203 two hydrogens attached
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1204 9 CH3(A) or ACH3 - carbon with at least one single bond and at least three
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1205 hydrogens attached
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1206 10 Halogen
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1207 11 A(-A-A-A) or A-A(-A)-A - atom has at least three single bonds
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1208 12 AAAAAA@1 > 2 - atom is in at least two different six-membered rings
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1209 13 A($A$A$A) or A$A($A)$A - atom has more than two ring bonds
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1210 14 A$A!A$A - atom is at a ring/chain boundary. When a comparison is done
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1211 with another atom the path passes through the chain bond.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1212 15 Anot%A%Anot%A - atom is at an aromatic/nonaromatic boundary. When a
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1213 comparison is done with another atom the path
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1214 passes through the aromatic bond.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1215 16 A!A!A - atom with more than one chain bond
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1216 17 A!A$A!A - atom is at a ring/chain boundary. When a comparison is done
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1217 with another atom the path passes through the ring bond.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1218 18 A%Anot%A%A - atom is at an aromatic/nonaromatic boundary. When a
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1219 comparison is done with another atom the
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1220 path passes through the nonaromatic bond.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1221 19 HETEROCYCLE - atom is a heteroatom in a ring.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1222 20 rare properties: atom with five or more neighbors, atom in
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1223 four or more rings, or atom types other than
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1224 H, C, N, O, S, F, Cl, Br, or I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1225 21 rare properties: atom has a charge, is an isotope, has two or
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1226 more multiple bonds, or has a triple bond.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1227 22 N - nitrogen
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1228 23 S - sulfur
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1229 24 O - oxygen
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1230 25 A(AA)A(A)A(AA) - atom has two neighbors, each with three or
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1231 more neighbors (including the central atom).
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1232 26 CHACH2 - atom has two hydrocarbon (CH2) neighbors
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1233
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1234 Atomic environments properties-based keys (32):
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1235
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1236 Key Description
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1237 27 C(CC)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1238 28 C(CCC)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1239 29 C(CN)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1240 30 C(CCN)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1241 31 C(NN)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1242 32 C(NNC)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1243 33 C(NNN)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1244 34 C(CO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1245 35 C(CCO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1246 36 C(NO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1247 37 C(NCO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1248 38 C(NNO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1249 39 C(OO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1250 40 C(COO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1251 41 C(NOO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1252 42 C(OOO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1253 43 Q(CC)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1254 44 Q(CCC)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1255 45 Q(CN)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1256 46 Q(CCN)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1257 47 Q(NN)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1258 48 Q(CNN)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1259 49 Q(NNN)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1260 50 Q(CO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1261 51 Q(CCO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1262 52 Q(NO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1263 53 Q(CNO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1264 54 Q(NNO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1265 55 Q(OO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1266 56 Q(COO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1267 57 Q(NOO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1268 58 Q(OOO)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1269
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1270 Note: The first symbol is the central atom, with atoms bonded to the central atom listed in
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1271 parentheses. Q is any non-C, non-H atom. If only two atoms are in parentheses, there is
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1272 no implication concerning the other atoms bonded to the central atom.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1273
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1274 Atom-Bond-Atom properties-based keys: (264)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1275
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1276 Key Description
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1277 59 C-C
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1278 60 C-N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1279 61 C-O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1280 62 C-S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1281 63 C-Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1282 64 C-P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1283 65 C-F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1284 66 C-Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1285 67 C-Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1286 68 C-I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1287 69 C-X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1288 70 N-N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1289 71 N-O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1290 72 N-S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1291 73 N-Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1292 74 N-P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1293 75 N-F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1294 76 N-Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1295 77 N-Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1296 78 N-I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1297 79 N-X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1298 80 O-O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1299 81 O-S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1300 82 O-Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1301 83 O-P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1302 84 O-F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1303 85 O-Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1304 86 O-Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1305 87 O-I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1306 88 O-X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1307 89 S-S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1308 90 S-Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1309 91 S-P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1310 92 S-F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1311 93 S-Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1312 94 S-Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1313 95 S-I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1314 96 S-X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1315 97 Cl-Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1316 98 Cl-P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1317 99 Cl-F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1318 100 Cl-Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1319 101 Cl-Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1320 102 Cl-I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1321 103 Cl-X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1322 104 P-P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1323 105 P-F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1324 106 P-Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1325 107 P-Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1326 108 P-I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1327 109 P-X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1328 110 F-F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1329 111 F-Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1330 112 F-Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1331 113 F-I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1332 114 F-X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1333 115 Br-Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1334 116 Br-Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1335 117 Br-I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1336 118 Br-X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1337 119 Si-Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1338 120 Si-I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1339 121 Si-X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1340 122 I-I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1341 123 I-X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1342 124 X-X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1343 125 C=C
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1344 126 C=N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1345 127 C=O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1346 128 C=S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1347 129 C=Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1348 130 C=P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1349 131 C=F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1350 132 C=Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1351 133 C=Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1352 134 C=I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1353 135 C=X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1354 136 N=N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1355 137 N=O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1356 138 N=S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1357 139 N=Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1358 140 N=P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1359 141 N=F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1360 142 N=Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1361 143 N=Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1362 144 N=I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1363 145 N=X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1364 146 O=O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1365 147 O=S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1366 148 O=Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1367 149 O=P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1368 150 O=F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1369 151 O=Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1370 152 O=Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1371 153 O=I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1372 154 O=X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1373 155 S=S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1374 156 S=Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1375 157 S=P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1376 158 S=F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1377 159 S=Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1378 160 S=Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1379 161 S=I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1380 162 S=X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1381 163 Cl=Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1382 164 Cl=P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1383 165 Cl=F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1384 166 Cl=Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1385 167 Cl=Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1386 168 Cl=I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1387 169 Cl=X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1388 170 P=P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1389 171 P=F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1390 172 P=Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1391 173 P=Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1392 174 P=I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1393 175 P=X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1394 176 F=F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1395 177 F=Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1396 178 F=Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1397 179 F=I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1398 180 F=X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1399 181 Br=Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1400 182 Br=Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1401 183 Br=I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1402 184 Br=X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1403 185 Si=Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1404 186 Si=I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1405 187 Si=X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1406 188 I=I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1407 189 I=X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1408 190 X=X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1409 191 C#C
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1410 192 C#N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1411 193 C#O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1412 194 C#S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1413 195 C#Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1414 196 C#P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1415 197 C#F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1416 198 C#Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1417 199 C#Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1418 200 C#I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1419 201 C#X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1420 202 N#N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1421 203 N#O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1422 204 N#S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1423 205 N#Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1424 206 N#P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1425 207 N#F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1426 208 N#Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1427 209 N#Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1428 210 N#I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1429 211 N#X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1430 212 O#O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1431 213 O#S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1432 214 O#Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1433 215 O#P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1434 216 O#F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1435 217 O#Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1436 218 O#Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1437 219 O#I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1438 220 O#X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1439 221 S#S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1440 222 S#Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1441 223 S#P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1442 224 S#F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1443 225 S#Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1444 226 S#Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1445 227 S#I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1446 228 S#X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1447 229 Cl#Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1448 230 Cl#P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1449 231 Cl#F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1450 232 Cl#Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1451 233 Cl#Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1452 234 Cl#I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1453 235 Cl#X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1454 236 P#P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1455 237 P#F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1456 238 P#Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1457 239 P#Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1458 240 P#I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1459 241 P#X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1460 242 F#F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1461 243 F#Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1462 244 F#Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1463 245 F#I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1464 246 F#X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1465 247 Br#Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1466 248 Br#Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1467 249 Br#I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1468 250 Br#X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1469 251 Si#Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1470 252 Si#I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1471 253 Si#X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1472 254 I#I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1473 255 I#X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1474 256 X#X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1475 257 C$C
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1476 258 C$N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1477 259 C$O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1478 260 C$S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1479 261 C$Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1480 262 C$P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1481 263 C$F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1482 264 C$Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1483 265 C$Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1484 266 C$I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1485 267 C$X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1486 268 N$N
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1487 269 N$O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1488 270 N$S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1489 271 N$Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1490 272 N$P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1491 273 N$F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1492 274 N$Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1493 275 N$Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1494 276 N$I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1495 277 N$X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1496 278 O$O
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1497 279 O$S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1498 280 O$Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1499 281 O$P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1500 282 O$F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1501 283 O$Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1502 284 O$Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1503 285 O$I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1504 286 O$X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1505 287 S$S
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1506 288 S$Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1507 289 S$P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1508 290 S$F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1509 291 S$Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1510 292 S$Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1511 293 S$I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1512 294 S$X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1513 295 Cl$Cl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1514 296 Cl$P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1515 297 Cl$F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1516 298 Cl$Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1517 299 Cl$Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1518 300 Cl$I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1519 301 Cl$X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1520 302 P$P
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1521 303 P$F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1522 304 P$Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1523 305 P$Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1524 306 P$I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1525 307 P$X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1526 308 F$F
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1527 309 F$Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1528 310 F$Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1529 311 F$I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1530 312 F$X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1531 313 Br$Br
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1532 314 Br$Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1533 315 Br$I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1534 316 Br$X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1535 317 Si$Si
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1536 318 Si$I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1537 319 Si$X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1538 320 I$I
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1539 321 I$X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1540 322 X$X
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1541
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1542 =item B<--OutDelim> I<comma | tab | semicolon>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1543
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1544 Delimiter for output CSV/TSV text file(s). Possible values: I<comma, tab, or semicolon>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1545 Default value: I<comma>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1546
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1547 =item B<--output> I<SD | FP | text | all>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1548
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1549 Type of output files to generate. Possible values: I<SD, FP, text, or all>. Default value: I<text>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1550
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1551 =item B<-o, --overwrite>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1552
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1553 Overwrite existing files.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1554
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1555 =item B<-q, --quote> I<Yes | No>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1556
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1557 Put quote around column values in output CSV/TSV text file(s). Possible values:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1558 I<Yes or No>. Default value: I<Yes>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1559
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1560 =item B<-r, --root> I<RootName>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1561
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1562 New file name is generated using the root: <Root>.<Ext>. Default for new file
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1563 names: <SDFileName><MACCSKeysFP>.<Ext>. The file type determines <Ext> value.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1564 The sdf, fpf, csv, and tsv <Ext> values are used for SD, FP, comma/semicolon, and tab
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1565 delimited text files, respectively.This option is ignored for multiple input files.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1566
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1567 =item B<-s, --size> I<number>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1568
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1569 Size of MACCS keys [ Ref 45-47 ] set to use during fingerprints generation. Possible values: I<166 or 322>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1570 Default value: I<166>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1571
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1572 =item B<-v, --VectorStringFormat> I<ValuesString | IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1573
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1574 Format of fingerprints vector string data in output SD, FP or CSV/TSV text file(s) specified by
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1575 B<--output> used during I<MACCSKeyCount> value of B<-m, --mode> option. Possible
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1576 values: I<ValuesString, IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString |
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1577 ValuesAndIDsPairsString>. Defaultvalue: I<ValuesString>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1578
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1579 Examples:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1580
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1581 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1582 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1583 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1584 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1585 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1586 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1587
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1588 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1589 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1590 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1591 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1592 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1593 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1594
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1595 =item B<-w, --WorkingDir> I<DirName>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1596
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1597 Location of working directory. Default: current directory.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1598
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1599 =back
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1600
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1601 =head1 EXAMPLES
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1602
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1603 To generate MACCS keys fingerprints of size 166 in binary bit-vector string format
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1604 and create a SampleMACCS166FPBin.csv file containing sequential compound IDs along with
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1605 fingerprints bit-vector strings data, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1606
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1607 % MACCSKeysFingerprints.pl -r SampleMACCS166FPBin -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1608
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1609 To generate MACCS keys fingerprints of size 166 in binary bit-vector string format
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1610 and create SampleMACCS166FPBin.sdf, SampleMACCS166FPBin.csv and SampleMACCS166FPBin.csv
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1611 files containing sequential compound IDs in CSV file along with fingerprints bit-vector strings data, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1612
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1613 % MACCSKeysFingerprints.pl --output all -r SampleMACCS166FPBin
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1614 -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1615
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1616 To generate MACCS keys fingerprints of size 322 in binary bit-vector string format
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1617 and create a SampleMACCS322FPBin.csv file containing sequential compound IDs along with
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1618 fingerprints bit-vector strings data, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1619
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1620 % MACCSKeysFingerprints.pl -size 322 -r SampleMACCS322FPBin -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1621
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1622 To generate MACCS keys fingerprints of size 166 corresponding to count of keys in
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1623 ValuesString format and create a SampleMACCS166FPCount.csv file containing sequential
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1624 compound IDs along with fingerprints vector strings data, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1625
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1626 % MACCSKeysFingerprints.pl -m MACCSKeyCount -r SampleMACCS166FPCount
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1627 -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1628
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1629 To generate MACCS keys fingerprints of size 322 corresponding to count of keys in
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1630 ValuesString format and create a SampleMACCS322FPCount.csv file containing sequential
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1631 compound IDs along with fingerprints vector strings data, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1632
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1633 % MACCSKeysFingerprints.pl -m MACCSKeyCount -size 322
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1634 -r SampleMACCS322FPCount -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1635
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1636 To generate MACCS keys fingerprints of size 166 in hexadecimal bit-vector string format with
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1637 ascending bits order and create a SampleMACCS166FPHex.csv file containing compound IDs
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1638 from MolName along with fingerprints bit-vector strings data, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1639
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1640 % MACCSKeysFingerprints.pl -m MACCSKeyBits --size 166 --BitStringFormat
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1641 HexadecimalString --BitsOrder Ascending --DataFieldsMode CompoundID
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1642 --CompoundIDMode MolName -r SampleMACCS166FPBin -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1643
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1644 To generate MACCS keys fingerprints of size 166 corresponding to count of keys in
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1645 IDsAndValuesString format and create a SampleMACCS166FPCount.csv file containing
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1646 compound IDs from MolName line along with fingerprints vector strings data, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1647
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1648 % MACCSKeysFingerprints.pl -m MACCSKeyCount --size 166
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1649 --VectorStringFormat IDsAndValuesString --DataFieldsMode CompoundID
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1650 --CompoundIDMode MolName -r SampleMACCS166FPCount -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1651
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1652 To generate MACCS keys fingerprints of size 166 corresponding to count of keys in
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1653 IDsAndValuesString format and create a SampleMACCS166FPCount.csv file containing
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1654 compound IDs using specified data field along with fingerprints vector strings data, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1655
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1656 % MACCSKeysFingerprints.pl -m MACCSKeyCount --size 166
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1657 --VectorStringFormat IDsAndValuesString --DataFieldsMode CompoundID
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1658 --CompoundIDMode DataField --CompoundID Mol_ID -r
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1659 SampleMACCS166FPCount -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1660
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1661 To generate MACCS keys fingerprints of size 322 corresponding to count of keys in
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1662 ValuesString format and create a SampleMACCS322FPCount.tsv file containing compound
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1663 IDs derived from combination of molecule name line and an explicit compound prefix
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1664 along with fingerprints vector strings data in a column labels MACCSKeyCountFP, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1665
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1666 % MACCSKeysFingerprints.pl -m MACCSKeyCount -size 322 --DataFieldsMode
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1667 CompoundID --CompoundIDMode MolnameOrLabelPrefix --CompoundID Cmpd
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1668 --CompoundIDLabel MolID --FingerprintsLabel MACCSKeyCountFP --OutDelim
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1669 Tab -r SampleMACCS322FPCount -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1670
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1671 To generate MACCS keys fingerprints of size 166 corresponding to count of keys in
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1672 ValuesString format and create a SampleMACCS166FPCount.csv file containing
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1673 specific data fields columns along with fingerprints vector strings data, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1674
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1675 % MACCSKeysFingerprints.pl -m MACCSKeyCount --size 166
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1676 --VectorStringFormat ValuesString --DataFieldsMode Specify --DataFields
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1677 Mol_ID -r SampleMACCS166FPCount -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1678
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1679 To generate MACCS keys fingerprints of size 322 corresponding to count of keys in
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1680 ValuesString format and create a SampleMACCS322FPCount.csv file containing
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1681 common data fields columns along with fingerprints vector strings data, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1682
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1683 % MACCSKeysFingerprints.pl -m MACCSKeyCount --size 322
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1684 --VectorStringFormat ValuesString --DataFieldsMode Common -r
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1685 SampleMACCS322FPCount -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1686
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1687 To generate MACCS keys fingerprints of size 166 corresponding to count of keys in
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1688 ValuesString format and create SampleMACCS166FPCount.sdf, SampleMACCS166FPCount.fpf and
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1689 SampleMACCS166FPCount.csv files containing all data fields columns in CSV file
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1690 along with fingerprints vector strings data, type:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1691
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1692 % MACCSKeysFingerprints.pl -m MACCSKeyCount --size 166 --output all
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1693 --VectorStringFormat ValuesString --DataFieldsMode All -r
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1694 SampleMACCS166FPCount -o Sample.sdf
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1695
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1696 =head1 AUTHOR
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1697
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1698 Manish Sud <msud@san.rr.com>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1699
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1700 =head1 SEE ALSO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1701
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1702 InfoFingerprintsFiles.pl, SimilarityMatricesFingerprints.pl, AtomNeighborhoodsFingerprints.pl,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1703 ExtendedConnectivityFingerprints.pl, PathLengthFingerprints.pl,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1704 TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1705 TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1706
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1707 =head1 COPYRIGHT
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1708
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1709 Copyright (C) 2015 Manish Sud. All rights reserved.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1710
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1711 This file is part of MayaChemTools.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1712
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1713 MayaChemTools is free software; you can redistribute it and/or modify it under
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1714 the terms of the GNU Lesser General Public License as published by the Free
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1715 Software Foundation; either version 3 of the License, or (at your option)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1716 any later version.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1717
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1718 =cut