0
|
1 #!/usr/bin/perl -w
|
|
2 #
|
|
3 # $RCSfile: TopologicalPharmacophoreAtomPairsFingerprints.pl,v $
|
|
4 # $Date: 2015/02/28 20:46:23 $
|
|
5 # $Revision: 1.36 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use FindBin; use lib "$FindBin::Bin/../lib";
|
|
31 use Getopt::Long;
|
|
32 use File::Basename;
|
|
33 use Text::ParseWords;
|
|
34 use Benchmark;
|
|
35 use FileUtil;
|
|
36 use TextUtil;
|
|
37 use SDFileUtil;
|
|
38 use MoleculeFileIO;
|
|
39 use FileIO::FingerprintsSDFileIO;
|
|
40 use FileIO::FingerprintsTextFileIO;
|
|
41 use FileIO::FingerprintsFPFileIO;
|
|
42 use AtomTypes::FunctionalClassAtomTypes;
|
|
43 use Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints;
|
|
44
|
|
45 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
|
|
46
|
|
47 # Autoflush STDOUT
|
|
48 $| = 1;
|
|
49
|
|
50 # Starting message...
|
|
51 $ScriptName = basename($0);
|
|
52 print "\n$ScriptName: Starting...\n\n";
|
|
53 $StartTime = new Benchmark;
|
|
54
|
|
55 # Get the options and setup script...
|
|
56 SetupScriptUsage();
|
|
57 if ($Options{help} || @ARGV < 1) {
|
|
58 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
|
|
59 }
|
|
60
|
|
61 my(@SDFilesList);
|
|
62 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
|
|
63
|
|
64 # Process options...
|
|
65 print "Processing options...\n";
|
|
66 my(%OptionsInfo);
|
|
67 ProcessOptions();
|
|
68
|
|
69 # Setup information about input files...
|
|
70 print "Checking input SD file(s)...\n";
|
|
71 my(%SDFilesInfo);
|
|
72 RetrieveSDFilesInfo();
|
|
73
|
|
74 # Process input files..
|
|
75 my($FileIndex);
|
|
76 if (@SDFilesList > 1) {
|
|
77 print "\nProcessing SD files...\n";
|
|
78 }
|
|
79 for $FileIndex (0 .. $#SDFilesList) {
|
|
80 if ($SDFilesInfo{FileOkay}[$FileIndex]) {
|
|
81 print "\nProcessing file $SDFilesList[$FileIndex]...\n";
|
|
82 GenerateTopologicalPharmacophoreAtomPairsFingerprints($FileIndex);
|
|
83 }
|
|
84 }
|
|
85 print "\n$ScriptName:Done...\n\n";
|
|
86
|
|
87 $EndTime = new Benchmark;
|
|
88 $TotalTime = timediff ($EndTime, $StartTime);
|
|
89 print "Total time: ", timestr($TotalTime), "\n";
|
|
90
|
|
91 ###############################################################################
|
|
92
|
|
93 # Generate fingerprints for a SD file...
|
|
94 #
|
|
95 sub GenerateTopologicalPharmacophoreAtomPairsFingerprints {
|
|
96 my($FileIndex) = @_;
|
|
97 my($CmpdCount, $IgnoredCmpdCount, $SDFile, $MoleculeFileIO, $Molecule, $TopologicalPharmacophoreAtomPairsFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO, $SetupOutputFiles);
|
|
98
|
|
99 $SDFile = $SDFilesList[$FileIndex];
|
|
100
|
|
101 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = (undef) x 3;
|
|
102 $SetupOutputFiles = 1;
|
|
103
|
|
104 $MoleculeFileIO = new MoleculeFileIO('Name' => $SDFile);
|
|
105 $MoleculeFileIO->Open();
|
|
106
|
|
107 $CmpdCount = 0;
|
|
108 $IgnoredCmpdCount = 0;
|
|
109
|
|
110 COMPOUND: while ($Molecule = $MoleculeFileIO->ReadMolecule()) {
|
|
111 $CmpdCount++;
|
|
112
|
|
113 # Filter compound data before calculating fingerprints...
|
|
114 if ($OptionsInfo{Filter}) {
|
|
115 if (CheckAndFilterCompound($CmpdCount, $Molecule)) {
|
|
116 $IgnoredCmpdCount++;
|
|
117 next COMPOUND;
|
|
118 }
|
|
119 }
|
|
120
|
|
121 $TopologicalPharmacophoreAtomPairsFingerprints = GenerateMoleculeFingerprints($Molecule);
|
|
122 if (!$TopologicalPharmacophoreAtomPairsFingerprints) {
|
|
123 $IgnoredCmpdCount++;
|
|
124 ProcessIgnoredCompound('FingerprintsGenerationFailed', $CmpdCount, $Molecule);
|
|
125 next COMPOUND;
|
|
126 }
|
|
127
|
|
128 if ($SetupOutputFiles) {
|
|
129 $SetupOutputFiles = 0;
|
|
130 SetupFingerprintsLabelValueIDs($TopologicalPharmacophoreAtomPairsFingerprints);
|
|
131 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = SetupAndOpenOutputFiles($FileIndex);
|
|
132 }
|
|
133
|
|
134 WriteDataToOutputFiles($FileIndex, $CmpdCount, $Molecule, $TopologicalPharmacophoreAtomPairsFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
|
|
135 }
|
|
136 $MoleculeFileIO->Close();
|
|
137
|
|
138 if ($NewFPSDFileIO) {
|
|
139 $NewFPSDFileIO->Close();
|
|
140 }
|
|
141 if ($NewFPTextFileIO) {
|
|
142 $NewFPTextFileIO->Close();
|
|
143 }
|
|
144 if ($NewFPFileIO) {
|
|
145 $NewFPFileIO->Close();
|
|
146 }
|
|
147
|
|
148 WriteFingerprintsGenerationSummaryStatistics($CmpdCount, $IgnoredCmpdCount);
|
|
149 }
|
|
150
|
|
151 # Process compound being ignored due to problems in fingerprints geneation...
|
|
152 #
|
|
153 sub ProcessIgnoredCompound {
|
|
154 my($Mode, $CmpdCount, $Molecule) = @_;
|
|
155 my($CmpdID, $DataFieldLabelAndValuesRef);
|
|
156
|
|
157 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
|
|
158 $CmpdID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
|
|
159
|
|
160 MODE: {
|
|
161 if ($Mode =~ /^ContainsNonElementalData$/i) {
|
|
162 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains atom data corresponding to non-elemental atom symbol(s)...\n\n";
|
|
163 next MODE;
|
|
164 }
|
|
165
|
|
166 if ($Mode =~ /^ContainsNoElementalData$/i) {
|
|
167 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains no atom data...\n\n";
|
|
168 next MODE;
|
|
169 }
|
|
170
|
|
171 if ($Mode =~ /^FingerprintsGenerationFailed$/i) {
|
|
172 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n";
|
|
173 next MODE;
|
|
174 }
|
|
175 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n";
|
|
176 }
|
|
177 }
|
|
178
|
|
179 # Check and filter compounds....
|
|
180 #
|
|
181 sub CheckAndFilterCompound {
|
|
182 my($CmpdCount, $Molecule) = @_;
|
|
183 my($ElementCount, $NonElementCount);
|
|
184
|
|
185 ($ElementCount, $NonElementCount) = $Molecule->GetNumOfElementsAndNonElements();
|
|
186
|
|
187 if ($NonElementCount) {
|
|
188 ProcessIgnoredCompound('ContainsNonElementalData', $CmpdCount, $Molecule);
|
|
189 return 1;
|
|
190 }
|
|
191
|
|
192 if (!$ElementCount) {
|
|
193 ProcessIgnoredCompound('ContainsNoElementalData', $CmpdCount, $Molecule);
|
|
194 return 1;
|
|
195 }
|
|
196
|
|
197 return 0;
|
|
198 }
|
|
199
|
|
200 # Write out compounds fingerprints generation summary statistics...
|
|
201 #
|
|
202 sub WriteFingerprintsGenerationSummaryStatistics {
|
|
203 my($CmpdCount, $IgnoredCmpdCount) = @_;
|
|
204 my($ProcessedCmpdCount);
|
|
205
|
|
206 $ProcessedCmpdCount = $CmpdCount - $IgnoredCmpdCount;
|
|
207
|
|
208 print "\nNumber of compounds: $CmpdCount\n";
|
|
209 print "Number of compounds processed successfully during fingerprints generation: $ProcessedCmpdCount\n";
|
|
210 print "Number of compounds ignored during fingerprints generation: $IgnoredCmpdCount\n";
|
|
211 }
|
|
212
|
|
213 # Append atom pair value IDs to fingerprint label...
|
|
214 #
|
|
215 sub SetupFingerprintsLabelValueIDs {
|
|
216 my($TopologicalPharmacophoreAtomPairsFingerprints) = @_;
|
|
217
|
|
218 if ($OptionsInfo{AtomPairsSetSizeToUse} =~ /^ArbitrarySize$/i ||
|
|
219 $OptionsInfo{FingerprintsLabelMode} !~ /^FingerprintsLabelWithIDs$/i) {
|
|
220 return;
|
|
221 }
|
|
222
|
|
223 $OptionsInfo{FingerprintsLabel} .= "; Value IDs: " . $TopologicalPharmacophoreAtomPairsFingerprints->GetFingerprintsVector->GetValueIDsString();
|
|
224 }
|
|
225
|
|
226 # Open output files...
|
|
227 #
|
|
228 sub SetupAndOpenOutputFiles {
|
|
229 my($FileIndex) = @_;
|
|
230 my($NewFPSDFile, $NewFPFile, $NewFPTextFile, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO, %FingerprintsFileIOParams);
|
|
231
|
|
232 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = (undef) x 3;
|
|
233
|
|
234 # Setup common parameters for fingerprints file IO objects...
|
|
235 #
|
|
236 %FingerprintsFileIOParams = ('Mode' => 'Write', 'Overwrite' => $OptionsInfo{OverwriteFiles}, 'FingerprintsStringMode' => 'FingerprintsVectorString', 'VectorStringFormat' => $OptionsInfo{VectorStringFormat});
|
|
237
|
|
238 if ($OptionsInfo{SDOutput}) {
|
|
239 $NewFPSDFile = $SDFilesInfo{SDOutFileNames}[$FileIndex];
|
|
240 print "Generating SD file $NewFPSDFile...\n";
|
|
241 $NewFPSDFileIO = new FileIO::FingerprintsSDFileIO('Name' => $NewFPSDFile, %FingerprintsFileIOParams, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsLabel});
|
|
242 $NewFPSDFileIO->Open();
|
|
243 }
|
|
244
|
|
245 if ($OptionsInfo{FPOutput}) {
|
|
246 $NewFPFile = $SDFilesInfo{FPOutFileNames}[$FileIndex];
|
|
247 print "Generating FP file $NewFPFile...\n";
|
|
248 $NewFPFileIO = new FileIO::FingerprintsFPFileIO('Name' => $NewFPFile, %FingerprintsFileIOParams);
|
|
249 $NewFPFileIO->Open();
|
|
250 }
|
|
251
|
|
252 if ($OptionsInfo{TextOutput}) {
|
|
253 my($ColLabelsRef);
|
|
254
|
|
255 $NewFPTextFile = $SDFilesInfo{TextOutFileNames}[$FileIndex];
|
|
256 $ColLabelsRef = SetupFPTextFileCoulmnLabels($FileIndex);
|
|
257
|
|
258 print "Generating text file $NewFPTextFile...\n";
|
|
259 $NewFPTextFileIO = new FileIO::FingerprintsTextFileIO('Name' => $NewFPTextFile, %FingerprintsFileIOParams, 'DataColLabels' => $ColLabelsRef, 'OutDelim' => $OptionsInfo{OutDelim}, 'OutQuote' => $OptionsInfo{OutQuote});
|
|
260 $NewFPTextFileIO->Open();
|
|
261 }
|
|
262
|
|
263 return ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
|
|
264 }
|
|
265
|
|
266 # Write fingerpritns and other data to appropriate output files...
|
|
267 #
|
|
268 sub WriteDataToOutputFiles {
|
|
269 my($FileIndex, $CmpdCount, $Molecule, $TopologicalPharmacophoreAtomPairsFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = @_;
|
|
270 my($DataFieldLabelAndValuesRef);
|
|
271
|
|
272 $DataFieldLabelAndValuesRef = undef;
|
|
273 if ($NewFPTextFileIO || $NewFPFileIO) {
|
|
274 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
|
|
275 }
|
|
276
|
|
277 if ($NewFPSDFileIO) {
|
|
278 my($CmpdString);
|
|
279
|
|
280 $CmpdString = $Molecule->GetInputMoleculeString();
|
|
281 $NewFPSDFileIO->WriteFingerprints($TopologicalPharmacophoreAtomPairsFingerprints, $CmpdString);
|
|
282 }
|
|
283
|
|
284 if ($NewFPTextFileIO) {
|
|
285 my($ColValuesRef);
|
|
286
|
|
287 $ColValuesRef = SetupFPTextFileCoulmnValues($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
|
|
288 $NewFPTextFileIO->WriteFingerprints($TopologicalPharmacophoreAtomPairsFingerprints, $ColValuesRef);
|
|
289 }
|
|
290
|
|
291 if ($NewFPFileIO) {
|
|
292 my($CompoundID);
|
|
293
|
|
294 $CompoundID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
|
|
295 $NewFPFileIO->WriteFingerprints($TopologicalPharmacophoreAtomPairsFingerprints, $CompoundID);
|
|
296 }
|
|
297 }
|
|
298
|
|
299 # Generate approriate column labels for FPText output file...
|
|
300 #
|
|
301 sub SetupFPTextFileCoulmnLabels {
|
|
302 my($FileIndex) = @_;
|
|
303 my($Line, @ColLabels);
|
|
304
|
|
305 @ColLabels = ();
|
|
306 if ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
|
|
307 push @ColLabels, @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
|
|
308 }
|
|
309 elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
|
|
310 push @ColLabels, @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
|
|
311 }
|
|
312 elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
|
|
313 push @ColLabels, @{$OptionsInfo{SpecifiedDataFields}};
|
|
314 }
|
|
315 elsif ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
|
|
316 push @ColLabels, $OptionsInfo{CompoundIDLabel};
|
|
317 }
|
|
318 # Add fingerprints label...
|
|
319 push @ColLabels, $OptionsInfo{FingerprintsLabel};
|
|
320
|
|
321 return \@ColLabels;
|
|
322 }
|
|
323
|
|
324 # Generate column values FPText output file..
|
|
325 #
|
|
326 sub SetupFPTextFileCoulmnValues {
|
|
327 my($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
|
|
328 my(@ColValues);
|
|
329
|
|
330 @ColValues = ();
|
|
331 if ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
|
|
332 push @ColValues, SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
|
|
333 }
|
|
334 elsif ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
|
|
335 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
|
|
336 }
|
|
337 elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
|
|
338 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
|
|
339 }
|
|
340 elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
|
|
341 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$OptionsInfo{SpecifiedDataFields}};
|
|
342 }
|
|
343
|
|
344 return \@ColValues;
|
|
345 }
|
|
346
|
|
347 # Generate compound ID for FP and FPText output files..
|
|
348 #
|
|
349 sub SetupCmpdIDForOutputFiles {
|
|
350 my($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
|
|
351 my($CmpdID);
|
|
352
|
|
353 $CmpdID = '';
|
|
354 if ($OptionsInfo{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) {
|
|
355 my($MolName);
|
|
356 $MolName = $Molecule->GetName();
|
|
357 $CmpdID = $MolName ? $MolName : "$OptionsInfo{CompoundID}${CmpdCount}";
|
|
358 }
|
|
359 elsif ($OptionsInfo{CompoundIDMode} =~ /^LabelPrefix$/i) {
|
|
360 $CmpdID = "$OptionsInfo{CompoundID}${CmpdCount}";
|
|
361 }
|
|
362 elsif ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i) {
|
|
363 my($SpecifiedDataField);
|
|
364 $SpecifiedDataField = $OptionsInfo{CompoundID};
|
|
365 $CmpdID = exists $DataFieldLabelAndValuesRef->{$SpecifiedDataField} ? $DataFieldLabelAndValuesRef->{$SpecifiedDataField} : '';
|
|
366 }
|
|
367 elsif ($OptionsInfo{CompoundIDMode} =~ /^MolName$/i) {
|
|
368 $CmpdID = $Molecule->GetName();
|
|
369 }
|
|
370 return $CmpdID;
|
|
371 }
|
|
372
|
|
373 # Generate fingerprints for molecule...
|
|
374 #
|
|
375 sub GenerateMoleculeFingerprints {
|
|
376 my($Molecule) = @_;
|
|
377 my($TopologicalPharmacophoreAtomPairsFingerprints);
|
|
378
|
|
379 if ($OptionsInfo{KeepLargestComponent}) {
|
|
380 $Molecule->KeepLargestComponent();
|
|
381 }
|
|
382 if (!$Molecule->DetectRings()) {
|
|
383 return undef;
|
|
384 }
|
|
385 $Molecule->SetAromaticityModel($OptionsInfo{AromaticityModel});
|
|
386 $Molecule->DetectAromaticity();
|
|
387
|
|
388 if ($OptionsInfo{FuzzifyAtomPairsCount}) {
|
|
389 $TopologicalPharmacophoreAtomPairsFingerprints = new Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints('Molecule' => $Molecule, 'AtomPairsSetSizeToUse' => $OptionsInfo{AtomPairsSetSizeToUse}, 'MinDistance' => $OptionsInfo{MinDistance}, 'MaxDistance' => $OptionsInfo{MaxDistance}, 'AtomTypesToUse' => \@{$OptionsInfo{AtomTypesToUse}}, , 'NormalizationMethodology' => $OptionsInfo{NormalizationMethodology}, , 'ValuesPrecision' => $OptionsInfo{ValuesPrecision}, 'FuzzifyAtomPairsCount' => $OptionsInfo{FuzzifyAtomPairsCount}, 'FuzzificationMode' => $OptionsInfo{FuzzificationMode}, 'FuzzificationMethodology' => $OptionsInfo{FuzzificationMethodology}, 'FuzzFactor' => $OptionsInfo{FuzzFactor});
|
|
390 }
|
|
391 else {
|
|
392 $TopologicalPharmacophoreAtomPairsFingerprints = new Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints('Molecule' => $Molecule, 'AtomPairsSetSizeToUse' => $OptionsInfo{AtomPairsSetSizeToUse}, 'MinDistance' => $OptionsInfo{MinDistance}, 'MaxDistance' => $OptionsInfo{MaxDistance}, 'AtomTypesToUse' => \@{$OptionsInfo{AtomTypesToUse}}, 'NormalizationMethodology' => $OptionsInfo{NormalizationMethodology}, 'ValuesPrecision' => $OptionsInfo{ValuesPrecision});
|
|
393 }
|
|
394
|
|
395 # Set atom types weights...
|
|
396 if ($OptionsInfo{UseAtomTypesWeight}) {
|
|
397 $TopologicalPharmacophoreAtomPairsFingerprints->SetAtomTypesWeight(%{$OptionsInfo{AtomTypesWeight}});
|
|
398 }
|
|
399
|
|
400 # Generate fingerprints...
|
|
401 $TopologicalPharmacophoreAtomPairsFingerprints->GenerateFingerprints();
|
|
402
|
|
403 # Make sure fingerprints generation is successful...
|
|
404 if (!$TopologicalPharmacophoreAtomPairsFingerprints->IsFingerprintsGenerationSuccessful()) {
|
|
405 return undef;
|
|
406 }
|
|
407
|
|
408 return $TopologicalPharmacophoreAtomPairsFingerprints;
|
|
409 }
|
|
410
|
|
411 # Retrieve information about SD files...
|
|
412 #
|
|
413 sub RetrieveSDFilesInfo {
|
|
414 my($SDFile, $Index, $FileDir, $FileExt, $FileName, $OutFileRoot, $TextOutFileExt, $SDOutFileExt, $FPOutFileExt, $NewSDFileName, $NewFPFileName, $NewTextFileName, $CheckDataField, $CollectDataFields, $AllDataFieldsRef, $CommonDataFieldsRef);
|
|
415
|
|
416 %SDFilesInfo = ();
|
|
417 @{$SDFilesInfo{FileOkay}} = ();
|
|
418 @{$SDFilesInfo{OutFileRoot}} = ();
|
|
419 @{$SDFilesInfo{SDOutFileNames}} = ();
|
|
420 @{$SDFilesInfo{FPOutFileNames}} = ();
|
|
421 @{$SDFilesInfo{TextOutFileNames}} = ();
|
|
422 @{$SDFilesInfo{AllDataFieldsRef}} = ();
|
|
423 @{$SDFilesInfo{CommonDataFieldsRef}} = ();
|
|
424
|
|
425 $CheckDataField = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) && ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i)) ? 1 : 0;
|
|
426 $CollectDataFields = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^(All|Common)$/i)) ? 1 : 0;
|
|
427
|
|
428 FILELIST: for $Index (0 .. $#SDFilesList) {
|
|
429 $SDFile = $SDFilesList[$Index];
|
|
430
|
|
431 $SDFilesInfo{FileOkay}[$Index] = 0;
|
|
432 $SDFilesInfo{OutFileRoot}[$Index] = '';
|
|
433 $SDFilesInfo{SDOutFileNames}[$Index] = '';
|
|
434 $SDFilesInfo{FPOutFileNames}[$Index] = '';
|
|
435 $SDFilesInfo{TextOutFileNames}[$Index] = '';
|
|
436
|
|
437 $SDFile = $SDFilesList[$Index];
|
|
438 if (!(-e $SDFile)) {
|
|
439 warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
|
|
440 next FILELIST;
|
|
441 }
|
|
442 if (!CheckFileType($SDFile, "sd sdf")) {
|
|
443 warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
|
|
444 next FILELIST;
|
|
445 }
|
|
446
|
|
447 if ($CheckDataField) {
|
|
448 # Make sure data field exists in SD file..
|
|
449 my($CmpdString, $SpecifiedDataField, @CmpdLines, %DataFieldValues);
|
|
450
|
|
451 @CmpdLines = ();
|
|
452 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
|
|
453 $CmpdString = ReadCmpdString(\*SDFILE);
|
|
454 close SDFILE;
|
|
455 @CmpdLines = split "\n", $CmpdString;
|
|
456 %DataFieldValues = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
|
|
457 $SpecifiedDataField = $OptionsInfo{CompoundID};
|
|
458 if (!exists $DataFieldValues{$SpecifiedDataField}) {
|
|
459 warn "Warning: Ignoring file $SDFile: Data field value, $SpecifiedDataField, using \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\" doesn't exist\n";
|
|
460 next FILELIST;
|
|
461 }
|
|
462 }
|
|
463
|
|
464 $AllDataFieldsRef = '';
|
|
465 $CommonDataFieldsRef = '';
|
|
466 if ($CollectDataFields) {
|
|
467 my($CmpdCount);
|
|
468 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
|
|
469 ($CmpdCount, $AllDataFieldsRef, $CommonDataFieldsRef) = GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
|
|
470 close SDFILE;
|
|
471 }
|
|
472
|
|
473 # Setup output file names...
|
|
474 $FileDir = ""; $FileName = ""; $FileExt = "";
|
|
475 ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile);
|
|
476
|
|
477 $TextOutFileExt = "csv";
|
|
478 if ($Options{outdelim} =~ /^tab$/i) {
|
|
479 $TextOutFileExt = "tsv";
|
|
480 }
|
|
481 $SDOutFileExt = $FileExt;
|
|
482 $FPOutFileExt = "fpf";
|
|
483
|
|
484 if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) {
|
|
485 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
|
|
486 if ($RootFileName && $RootFileExt) {
|
|
487 $FileName = $RootFileName;
|
|
488 }
|
|
489 else {
|
|
490 $FileName = $OptionsInfo{OutFileRoot};
|
|
491 }
|
|
492 $OutFileRoot = $FileName;
|
|
493 }
|
|
494 else {
|
|
495 $OutFileRoot = "${FileName}TopologicalPharmacophoreAtomPairsFP";
|
|
496 }
|
|
497
|
|
498 $NewSDFileName = "${OutFileRoot}.${SDOutFileExt}";
|
|
499 $NewFPFileName = "${OutFileRoot}.${FPOutFileExt}";
|
|
500 $NewTextFileName = "${OutFileRoot}.${TextOutFileExt}";
|
|
501
|
|
502 if ($OptionsInfo{SDOutput}) {
|
|
503 if ($SDFile =~ /$NewSDFileName/i) {
|
|
504 warn "Warning: Ignoring input file $SDFile: Same output, $NewSDFileName, and input file names.\n";
|
|
505 print "Specify a different name using \"-r --root\" option or use default name.\n";
|
|
506 next FILELIST;
|
|
507 }
|
|
508 }
|
|
509
|
|
510 if (!$OptionsInfo{OverwriteFiles}) {
|
|
511 # Check SD and text outout files...
|
|
512 if ($OptionsInfo{SDOutput}) {
|
|
513 if (-e $NewSDFileName) {
|
|
514 warn "Warning: Ignoring file $SDFile: The file $NewSDFileName already exists\n";
|
|
515 next FILELIST;
|
|
516 }
|
|
517 }
|
|
518 if ($OptionsInfo{FPOutput}) {
|
|
519 if (-e $NewFPFileName) {
|
|
520 warn "Warning: Ignoring file $SDFile: The file $NewFPFileName already exists\n";
|
|
521 next FILELIST;
|
|
522 }
|
|
523 }
|
|
524 if ($OptionsInfo{TextOutput}) {
|
|
525 if (-e $NewTextFileName) {
|
|
526 warn "Warning: Ignoring file $SDFile: The file $NewTextFileName already exists\n";
|
|
527 next FILELIST;
|
|
528 }
|
|
529 }
|
|
530 }
|
|
531
|
|
532 $SDFilesInfo{FileOkay}[$Index] = 1;
|
|
533
|
|
534 $SDFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
|
|
535 $SDFilesInfo{SDOutFileNames}[$Index] = $NewSDFileName;
|
|
536 $SDFilesInfo{FPOutFileNames}[$Index] = $NewFPFileName;
|
|
537 $SDFilesInfo{TextOutFileNames}[$Index] = $NewTextFileName;
|
|
538
|
|
539 $SDFilesInfo{AllDataFieldsRef}[$Index] = $AllDataFieldsRef;
|
|
540 $SDFilesInfo{CommonDataFieldsRef}[$Index] = $CommonDataFieldsRef;
|
|
541 }
|
|
542 }
|
|
543
|
|
544 # Process option values...
|
|
545 sub ProcessOptions {
|
|
546 %OptionsInfo = ();
|
|
547
|
|
548 ProcessAtomTypesToUseOption();
|
|
549 ProcessAtomTypesWeightOption();
|
|
550
|
|
551 $OptionsInfo{AromaticityModel} = $Options{aromaticitymodel};
|
|
552
|
|
553 $OptionsInfo{AtomPairsSetSizeToUse} = $Options{atompairssetsizetouse};
|
|
554
|
|
555 $OptionsInfo{CompoundIDMode} = $Options{compoundidmode};
|
|
556 $OptionsInfo{CompoundIDLabel} = $Options{compoundidlabel};
|
|
557 $OptionsInfo{DataFieldsMode} = $Options{datafieldsmode};
|
|
558
|
|
559 my(@SpecifiedDataFields);
|
|
560 @SpecifiedDataFields = ();
|
|
561
|
|
562 @{$OptionsInfo{SpecifiedDataFields}} = ();
|
|
563 $OptionsInfo{CompoundID} = '';
|
|
564
|
|
565 if ($Options{datafieldsmode} =~ /^CompoundID$/i) {
|
|
566 if ($Options{compoundidmode} =~ /^DataField$/i) {
|
|
567 if (!$Options{compoundid}) {
|
|
568 die "Error: You must specify a value for \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\". \n";
|
|
569 }
|
|
570 $OptionsInfo{CompoundID} = $Options{compoundid};
|
|
571 }
|
|
572 elsif ($Options{compoundidmode} =~ /^(LabelPrefix|MolNameOrLabelPrefix)$/i) {
|
|
573 $OptionsInfo{CompoundID} = $Options{compoundid} ? $Options{compoundid} : 'Cmpd';
|
|
574 }
|
|
575 }
|
|
576 elsif ($Options{datafieldsmode} =~ /^Specify$/i) {
|
|
577 if (!$Options{datafields}) {
|
|
578 die "Error: You must specify a value for \"--DataFields\" option in \"Specify\" \"-d, --DataFieldsMode\". \n";
|
|
579 }
|
|
580 @SpecifiedDataFields = split /\,/, $Options{datafields};
|
|
581 push @{$OptionsInfo{SpecifiedDataFields}}, @SpecifiedDataFields;
|
|
582 }
|
|
583
|
|
584 $OptionsInfo{Filter} = ($Options{filter} =~ /^Yes$/i) ? 1 : 0;
|
|
585
|
|
586 $OptionsInfo{FingerprintsLabelMode} = $Options{fingerprintslabelmode};
|
|
587 $OptionsInfo{FingerprintsLabel} = $Options{fingerprintslabel} ? $Options{fingerprintslabel} : 'TopologicalPharmacophoreAtomPairsFingerprints';
|
|
588
|
|
589 $OptionsInfo{FuzzifyAtomPairsCount} = ($Options{fuzzifyatompairscount} =~ /^Yes$/i) ? 1 : 0;
|
|
590 $OptionsInfo{FuzzificationMode} = $Options{fuzzificationmode};
|
|
591 $OptionsInfo{FuzzificationMethodology} = $Options{fuzzificationmethodology};
|
|
592 $OptionsInfo{FuzzFactor} = $Options{fuzzfactor};
|
|
593
|
|
594 $OptionsInfo{KeepLargestComponent} = ($Options{keeplargestcomponent} =~ /^Yes$/i) ? 1 : 0;
|
|
595
|
|
596 $OptionsInfo{MinDistance} = $Options{mindistance};
|
|
597 $OptionsInfo{MaxDistance} = $Options{maxdistance};
|
|
598
|
|
599 $OptionsInfo{NormalizationMethodology} = $Options{normalizationmethodology};
|
|
600
|
|
601 $OptionsInfo{Output} = $Options{output};
|
|
602 $OptionsInfo{SDOutput} = ($Options{output} =~ /^(SD|All)$/i) ? 1 : 0;
|
|
603 $OptionsInfo{FPOutput} = ($Options{output} =~ /^(FP|All)$/i) ? 1 : 0;
|
|
604 $OptionsInfo{TextOutput} = ($Options{output} =~ /^(Text|All)$/i) ? 1 : 0;
|
|
605
|
|
606 $OptionsInfo{OutDelim} = $Options{outdelim};
|
|
607 $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0;
|
|
608
|
|
609 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
|
|
610 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
|
|
611
|
|
612 $OptionsInfo{ValuesPrecision} = $Options{valuesprecision};
|
|
613
|
|
614 # Setup default vector string format...
|
|
615 my($VectorStringFormat);
|
|
616 $VectorStringFormat = '';
|
|
617
|
|
618 if ($Options{vectorstringformat}) {
|
|
619 $VectorStringFormat = $Options{vectorstringformat};
|
|
620
|
|
621 if ($Options{atompairssetsizetouse} =~ /^ArbitrarySize$/i && $VectorStringFormat =~ /^ValuesString$/i) {
|
|
622 die "Error: The value specified, $Options{vectorstringformat}, for option \"-v, --VectorStringFormat\" is not valid for $Options{atompairssetsizetouse} value of \"--AtomPairsSetSizeToUse\" option. Allowed values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString\n";
|
|
623 }
|
|
624 }
|
|
625 else {
|
|
626 $VectorStringFormat = ($Options{atompairssetsizetouse} =~ /^FixedSize$/) ? "ValuesString" : "IDsAndValuesString";
|
|
627 }
|
|
628 $OptionsInfo{VectorStringFormat} = $VectorStringFormat;
|
|
629 }
|
|
630
|
|
631 # Process atom type to use option...
|
|
632 #
|
|
633 sub ProcessAtomTypesToUseOption {
|
|
634 my($AtomType, $SpecifiedAtomTypesToUse, @AtomTypesWords);
|
|
635
|
|
636 @{$OptionsInfo{AtomTypesToUse}} = ();
|
|
637 if (IsEmpty($Options{atomtypestouse})) {
|
|
638 die "Error: Atom types value specified using \"-a, --AtomTypesToUse\" option is empty\n";
|
|
639 }
|
|
640
|
|
641 $SpecifiedAtomTypesToUse = $Options{atomtypestouse};
|
|
642 $SpecifiedAtomTypesToUse =~ s/ //g;
|
|
643 @AtomTypesWords = split /\,/, $SpecifiedAtomTypesToUse;
|
|
644
|
|
645 for $AtomType (@AtomTypesWords) {
|
|
646 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($AtomType)) {
|
|
647 die "Error: Atomic type specified, $AtomType, using \"-a, --AtomTypesToUse\" option is not valid...\n ";
|
|
648 }
|
|
649 push @{$OptionsInfo{AtomTypesToUse}}, $AtomType;
|
|
650 }
|
|
651 }
|
|
652
|
|
653 # Process atom types weight option...
|
|
654 #
|
|
655 sub ProcessAtomTypesWeightOption {
|
|
656 my($Index, $AtomType, $AtomTypeWeight, $SpecifiedAtomTypesWeight, @AtomTypesWeightsPairs);
|
|
657
|
|
658 %{$OptionsInfo{AtomTypesWeight}} = ();
|
|
659
|
|
660 if (IsEmpty($Options{atomtypesweight})) {
|
|
661 die "Error: Atom types weight value specified using \"--AtomTypesWeight\" option is empty\n";
|
|
662 }
|
|
663 $OptionsInfo{UseAtomTypesWeight} = ($Options{atomtypesweight} =~ /^None$/i) ? 0 : 1;
|
|
664 if (!$OptionsInfo{UseAtomTypesWeight}) {
|
|
665 return;
|
|
666 }
|
|
667
|
|
668 # Process specified atom type/weight pairs...
|
|
669 $SpecifiedAtomTypesWeight = $Options{atomtypesweight};
|
|
670 $SpecifiedAtomTypesWeight =~ s/ //g;
|
|
671 @AtomTypesWeightsPairs = split /\,/, $SpecifiedAtomTypesWeight;
|
|
672
|
|
673 if (@AtomTypesWeightsPairs % 2) {
|
|
674 die "Error: Invalid number of values specified using \"--AtomTypesWeight\" option: It must contain even number of values.\n";
|
|
675 }
|
|
676
|
|
677 for ($Index = 0; $Index < @AtomTypesWeightsPairs; $Index += 2) {
|
|
678 $AtomType = $AtomTypesWeightsPairs[$Index]; $AtomTypeWeight = $AtomTypesWeightsPairs[$Index + 1];
|
|
679 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($AtomType)) {
|
|
680 die "Error: Atom type specified, $AtomType, using \"--AtomTypesWeight\" option is not valid\n ";
|
|
681 }
|
|
682 if (!(IsFloat($AtomTypeWeight) && $AtomTypeWeight >= 0)) {
|
|
683 die "Error: Atom type weight specified, $AtomTypeWeight, using option \"--AtomTypesWeight\" is not valid. Allowed values: real numbers >= 0 \n";
|
|
684 }
|
|
685 $OptionsInfo{AtomTypesWeight}{$AtomType} = $AtomTypeWeight;
|
|
686 }
|
|
687 }
|
|
688
|
|
689 # Setup script usage and retrieve command line arguments specified using various options...
|
|
690 sub SetupScriptUsage {
|
|
691
|
|
692 # Retrieve all the options...
|
|
693 %Options = ();
|
|
694
|
|
695 $Options{aromaticitymodel} = 'MayaChemToolsAromaticityModel';
|
|
696
|
|
697 $Options{atompairssetsizetouse} = 'ArbitrarySize';
|
|
698
|
|
699 $Options{atomtypestouse} = 'HBD,HBA,PI,NI,H';
|
|
700 $Options{atomtypesweight} = 'None';
|
|
701
|
|
702 $Options{compoundidmode} = 'LabelPrefix';
|
|
703 $Options{compoundidlabel} = 'CompoundID';
|
|
704 $Options{datafieldsmode} = 'CompoundID';
|
|
705
|
|
706 $Options{filter} = 'Yes';
|
|
707
|
|
708 $Options{fingerprintslabelmode} = 'FingerprintsLabelOnly';
|
|
709
|
|
710 $Options{fuzzifyatompairscount} = 'No';
|
|
711 $Options{fuzzificationmode} = 'AfterNormalization';
|
|
712 $Options{fuzzificationmethodology} = 'FuzzyBinning';
|
|
713 $Options{fuzzfactor} = 0.15;
|
|
714
|
|
715 $Options{keeplargestcomponent} = 'Yes';
|
|
716
|
|
717 $Options{mindistance} = 1;
|
|
718 $Options{maxdistance} = 10;
|
|
719
|
|
720 $Options{normalizationmethodology} = 'None';
|
|
721
|
|
722 $Options{output} = 'text';
|
|
723 $Options{outdelim} = 'comma';
|
|
724 $Options{quote} = 'yes';
|
|
725
|
|
726 $Options{valuesprecision} = 2;
|
|
727
|
|
728 $Options{vectorstringformat} = '';
|
|
729
|
|
730 if (!GetOptions(\%Options, "aromaticitymodel=s", "atompairssetsizetouse=s", "atomtypestouse|a=s", "atomtypesweight=s", "compoundid=s", "compoundidlabel=s", "compoundidmode=s", "datafields=s", "datafieldsmode|d=s", "filter|f=s", "fingerprintslabelmode=s", "fingerprintslabel=s", "fuzzifyatompairscount=s", "fuzzificationmode=s", "fuzzificationmethodology=s", "fuzzfactor=s", "help|h", "keeplargestcomponent|k=s", "mindistance=s", "maxdistance=s", "normalizationmethodology|n=s", "outdelim=s", "output=s", "overwrite|o", "quote|q=s", "root|r=s", "valuesprecision=s", "vectorstringformat|v=s", "workingdir|w=s")) {
|
|
731 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
|
|
732 }
|
|
733 if ($Options{workingdir}) {
|
|
734 if (! -d $Options{workingdir}) {
|
|
735 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
|
|
736 }
|
|
737 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
|
|
738 }
|
|
739 if (!Molecule::IsSupportedAromaticityModel($Options{aromaticitymodel})) {
|
|
740 my(@SupportedModels) = Molecule::GetSupportedAromaticityModels();
|
|
741 die "Error: The value specified, $Options{aromaticitymodel}, for option \"--AromaticityModel\" is not valid. Supported aromaticity models in current release of MayaChemTools: @SupportedModels\n";
|
|
742 }
|
|
743 if ($Options{atompairssetsizetouse} !~ /^(ArbitrarySize|FixedSize)$/i) {
|
|
744 die "Error: The value specified, $Options{atompairssetsizetouse}, for option \"--AtomPairsSetSizeToUse\" is not valid. Allowed values: ArbitrarySize or FixedSize\n";
|
|
745 }
|
|
746 if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
|
|
747 die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n";
|
|
748 }
|
|
749 if ($Options{datafieldsmode} !~ /^(All|Common|Specify|CompoundID)$/i) {
|
|
750 die "Error: The value specified, $Options{datafieldsmode}, for option \"-d, --DataFieldsMode\" is not valid. Allowed values: All, Common, Specify or CompoundID\n";
|
|
751 }
|
|
752 if ($Options{filter} !~ /^(Yes|No)$/i) {
|
|
753 die "Error: The value specified, $Options{filter}, for option \"-f, --Filter\" is not valid. Allowed values: Yes or No\n";
|
|
754 }
|
|
755 if ($Options{fingerprintslabelmode} !~ /^(FingerprintsLabelOnly|FingerprintsLabelWithIDs)$/i) {
|
|
756 die "Error: The value specified, $Options{fingerprintslabelmode}, for option \"--FingerprintsLabelMode\" is not valid. Allowed values: FingerprintsLabelOnly or FingerprintsLabelWithIDs\n";
|
|
757 }
|
|
758 if ($Options{fuzzifyatompairscount} !~ /^(Yes|No)$/i) {
|
|
759 die "Error: The value specified, $Options{fuzzifyatompairscount}, for option \"--FuzzifyAtomPairsCount\" is not valid. Allowed values: Yes or No\n";
|
|
760 }
|
|
761 if ($Options{fuzzificationmode} !~ /^(BeforeNormalization|AfterNormalization)$/i) {
|
|
762 die "Error: The value specified, $Options{fuzzificationmode}, for option \"--FuzzificationMode\" is not valid. Allowed values: BeforeNormalization or AfterNormalization\n";
|
|
763 }
|
|
764 if ($Options{fuzzificationmethodology} !~ /^(FuzzyBinning|FuzzyBinSmoothing)$/i) {
|
|
765 die "Error: The value specified, $Options{fuzzificationmethodology}, for option \"--FuzzificationMethodology\" is not valid. Allowed values: FuzzyBinning or FuzzyBinSmoothing\n";
|
|
766 }
|
|
767 if (!IsFloat($Options{fuzzfactor})) {
|
|
768 die "Error: The value specified, $Options{fuzzfactor}, for option \"--FuzzFactor\" is not valid. Allowed values: real numbers >= 0 \n";
|
|
769 }
|
|
770 if ($Options{fuzzificationmethodology} !~ /^FuzzyBinning$/i) {
|
|
771 if (!($Options{fuzzfactor} >=0 && $Options{fuzzfactor} <= 1.0)) {
|
|
772 die "Error: The value specified, $Options{fuzzfactor}, for option \"--FuzzFactor\" during FuzzyBinning \"--FuzzificationMethodology\" is not valid. Allowed values: >= 0 and <= 1 \n";
|
|
773 }
|
|
774 }
|
|
775 elsif ($Options{fuzzificationmethodology} !~ /^FuzzyBinSmoothing$/i) {
|
|
776 if (!($Options{fuzzfactor} >=0 && $Options{fuzzfactor} <= 0.5)) {
|
|
777 die "Error: The value specified, $Options{fuzzfactor}, for option \"--FuzzFactor\" during FuzzyBinSmoothing \"--FuzzificationMethodology\" is not valid. Allowed values: >= 0 and <= 0.5 \n";
|
|
778 }
|
|
779 }
|
|
780 if ($Options{keeplargestcomponent} !~ /^(Yes|No)$/i) {
|
|
781 die "Error: The value specified, $Options{keeplargestcomponent}, for option \"-k, --KeepLargestComponent\" is not valid. Allowed values: Yes or No\n";
|
|
782 }
|
|
783 if (!IsInteger($Options{mindistance})) {
|
|
784 die "Error: The value specified, $Options{mindistance}, for option \"--MinDistance\" is not valid. Allowed values: >= 0 \n";
|
|
785 }
|
|
786 if (!IsPositiveInteger($Options{maxdistance})) {
|
|
787 die "Error: The value specified, $Options{maxdistance}, for option \"--MaxDistance\" is not valid. Allowed values: > 0 \n";
|
|
788 }
|
|
789 if ($Options{mindistance} > $Options{maxdistance}) {
|
|
790 die "Error: The value specified, specified, $Options{mindistance}, for option \"--MinDistance\" must be less than the value specified, $Options{maxdistance}, for option \"--MaxDistance\" \n";
|
|
791 }
|
|
792 if ($Options{normalizationmethodology} !~ /^(None|ByHeavyAtomsCount|ByAtomTypesCount)$/i) {
|
|
793 die "Error: The value specified, $Options{normalizationmethodology}, for option \"--NormalizationMethodology\" is not valid. Allowed values: None, ByHeavyAtomsCount, or ByAtomTypesCount\n";
|
|
794 }
|
|
795 if ($Options{output} !~ /^(SD|FP|text|all)$/i) {
|
|
796 die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, FP, text, or all\n";
|
|
797 }
|
|
798 if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
|
|
799 die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
|
|
800 }
|
|
801 if ($Options{quote} !~ /^(Yes|No)$/i) {
|
|
802 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n";
|
|
803 }
|
|
804 if ($Options{outdelim} =~ /semicolon/i && $Options{quote} =~ /^No$/i) {
|
|
805 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not allowed with, semicolon value of \"--outdelim\" option: Fingerprints string use semicolon as delimiter for various data fields and must be quoted.\n";
|
|
806 }
|
|
807 if (!IsPositiveInteger($Options{valuesprecision})) {
|
|
808 die "Error: The value specified, $Options{valuesprecision}, for option \"--ValuesPrecision\" is not valid. Allowed values: > 0 \n";
|
|
809 }
|
|
810 if ($Options{vectorstringformat} && $Options{vectorstringformat} !~ /^(ValuesString|IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString)$/i) {
|
|
811 die "Error: The value specified, $Options{vectorstringformat}, for option \"-v, --VectorStringFormat\" is not valid. Allowed values: ValuesString, IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString\n";
|
|
812 }
|
|
813 }
|
|
814
|
|
815 __END__
|
|
816
|
|
817 =head1 NAME
|
|
818
|
|
819 TopologicalPharmacophoreAtomPairsFingerprints.pl - Generate topological pharmacophore atom pairs fingerprints for SD files
|
|
820
|
|
821 =head1 SYNOPSIS
|
|
822
|
|
823 TopologicalPharmacophoreAtomPairsFingerprints.pl SDFile(s)...
|
|
824
|
|
825 TopologicalPharmacophoreAtomPairsFingerprints.pl [B<--AromaticityModel> I<AromaticityModelType>]
|
|
826 [B<--AtomPairsSetSizeToUse> I<ArbitrarySize | FixedSize>]
|
|
827 [B<-a, --AtomTypesToUse> I<"AtomType1, AtomType2...">]
|
|
828 [B<--AtomTypesWeight> I<"AtomType1, Weight1, AtomType2, Weight2...">]
|
|
829 [B<--CompoundID> I<DataFieldName or LabelPrefixString>] [B<--CompoundIDLabel> I<text>]
|
|
830 [B<--CompoundIDMode>] [B<--DataFields> I<"FieldLabel1, FieldLabel2,...">]
|
|
831 [B<-d, --DataFieldsMode> I<All | Common | Specify | CompoundID>] [B<-f, --Filter> I<Yes | No>]
|
|
832 [B<--FingerprintsLabelMode> I<FingerprintsLabelOnly | FingerprintsLabelWithIDs>] [B<--FingerprintsLabel> I<text>]
|
|
833 [B<--FuzzifyAtomPairsCount> I<Yes | No>] [B<--FuzzificationMode> I<FuzzyBinning | FuzzyBinSmoothing>]
|
|
834 [B<--FuzzificationMethodology> I<FuzzyBinning | FuzzyBinSmoothing>] [B<--FuzzFactor> I<number>]
|
|
835 [B<-h, --help>] [B<-k, --KeepLargestComponent> I<Yes | No>] [B<--MinDistance> I<number>]
|
|
836 [B<--MaxDistance> I<number>] [B<-n, --NormalizationMethodology> I<None | ByHeavyAtomsCount | ByAtomTypesCount>]
|
|
837 [B<--OutDelim> I<comma | tab | semicolon>] [B<--output> I<SD | FP | text | all>] [B<-o, --overwrite>]
|
|
838 [B<-q, --quote> I<Yes | No>] [B<-r, --root> I<RootName>] [B<--ValuesPrecision> I<number>]
|
|
839 [B<-v, --VectorStringFormat> I<ValuesString, IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString>]
|
|
840 [B<-w, --WorkingDir> dirname] SDFile(s)...
|
|
841
|
|
842 =head1 DESCRIPTION
|
|
843
|
|
844 Generate topological pharmacophore atom pairs fingerprints [ Ref 60-62, Ref 65, Ref 68 ] for
|
|
845 I<SDFile(s)> and create appropriate SD, FP or CSV/TSV text file(s) containing fingerprints vector
|
|
846 strings corresponding to molecular fingerprints.
|
|
847
|
|
848 Multiple SDFile names are separated by spaces. The valid file extensions are I<.sdf>
|
|
849 and I<.sd>. All other file names are ignored. All the SD files in a current directory
|
|
850 can be specified either by I<*.sdf> or the current directory name.
|
|
851
|
|
852 Based on the values specified for B<--AtomTypesToUse>, pharmacophore atom types are
|
|
853 assigned to all non-hydrogen atoms in a molecule and a distance matrix is generated.
|
|
854 A pharmacophore atom pairs basis set is initialized for all unique possible pairs within
|
|
855 B<--MinDistance> and B<--MaxDistance> range.
|
|
856
|
|
857 Let:
|
|
858
|
|
859 P = Valid pharmacophore atom type
|
|
860
|
|
861 Px = Pharmacophore atom type x
|
|
862 Py = Pharmacophore atom type y
|
|
863
|
|
864 Dmin = Minimum distance corresponding to number of bonds between
|
|
865 two atoms
|
|
866 Dmax = Maximum distance corresponding to number of bonds between
|
|
867 two atoms
|
|
868 D = Distance corresponding to number of bonds between two atoms
|
|
869
|
|
870 Px-Dn-Py = Pharmacophore atom pair ID for atom types Px and Py at
|
|
871 distance Dn
|
|
872
|
|
873 P = Number of pharmacophore atom types to consider
|
|
874 PPDn = Number of possible unique pharmacophore atom pairs at a distance Dn
|
|
875
|
|
876 PPT = Total number of possible pharmacophore atom pairs at all distances
|
|
877 between Dmin and Dmax
|
|
878
|
|
879 Then:
|
|
880
|
|
881 PPD = (P * (P - 1))/2 + P
|
|
882
|
|
883 PPT = ((Dmax - Dmin) + 1) * ((P * (P - 1))/2 + P)
|
|
884 = ((Dmax - Dmin) + 1) * PPD
|
|
885
|
|
886 So for default values of Dmin = 1, Dmax = 10 and P = 5,
|
|
887
|
|
888 PPD = (5 * (5 - 1))/2 + 5 = 15
|
|
889 PPT = ((10 - 1) + 1) * 15 = 150
|
|
890
|
|
891 The pharmacophore atom pairs bais set includes 150 values.
|
|
892
|
|
893 The atom pair IDs correspond to:
|
|
894
|
|
895 Px-Dn-Py = Pharmacophore atom pair ID for atom types Px and Py at
|
|
896 distance Dn
|
|
897
|
|
898 For example: H-D1-H, H-D2-HBA, PI-D5-PI and so on
|
|
899
|
|
900 Using distance matrix and pharmacohore atom types, occurrence of unique pharmacohore atom
|
|
901 pairs is counted. The contribution of each atom type to atom pair interaction is optionally
|
|
902 weighted by specified B<--AtomTypesWeight> before assigning its count to appropriate distance
|
|
903 bin. Based on B<--NormalizationMethodology> option, pharmacophore atom pairs count is optionally
|
|
904 normalized. Additionally, pharmacohore atom pairs count is optionally fuzzified before or after
|
|
905 the normalization controlled by values of B<--FuzzifyAtomPairsCount>, B<--FuzzificationMode>,
|
|
906 B<--FuzzificationMethodology> and B<--FuzzFactor> options.
|
|
907
|
|
908 The final pharmacophore atom pairs count along with atom pair identifiers involving all non-hydrogen
|
|
909 atoms, with optional normalization and fuzzification, constitute pharmacophore topological atom pairs
|
|
910 fingerprints of the molecule.
|
|
911
|
|
912 For I<ArbitrarySize> value of B<--AtomPairsSetSizeToUse> option, the fingerprint vector correspond to
|
|
913 only those topological pharmacophore atom pairs which are present and have non-zero count. However,
|
|
914 for I<FixedSize> value of B<--AtomPairsSetSizeToUse> option, the fingerprint vector contains all possible
|
|
915 valid topological pharmacophore atom pairs with both zero and non-zero count values.
|
|
916
|
|
917 Example of I<SD> file containing topological pharmacophore atom pairs fingerprints string data:
|
|
918
|
|
919 ... ...
|
|
920 ... ...
|
|
921 $$$$
|
|
922 ... ...
|
|
923 ... ...
|
|
924 ... ...
|
|
925 41 44 0 0 0 0 0 0 0 0999 V2000
|
|
926 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
927 ... ...
|
|
928 2 3 1 0 0 0 0
|
|
929 ... ...
|
|
930 M END
|
|
931 > <CmpdID>
|
|
932 Cmpd1
|
|
933
|
|
934 > <TopologicalPharmacophoreAtomPairsFingerprints>
|
|
935 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min
|
|
936 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H
|
|
937 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2-
|
|
938 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D...;
|
|
939 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10 3
|
|
940 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1
|
|
941
|
|
942 $$$$
|
|
943 ... ...
|
|
944 ... ...
|
|
945
|
|
946 Example of I<FP> file containing topological pharmacophore atom pairs fingerprints string data:
|
|
947
|
|
948 #
|
|
949 # Package = MayaChemTools 7.4
|
|
950 # Release Date = Oct 21, 2010
|
|
951 #
|
|
952 # TimeStamp = Fri Mar 11 15:32:48 2011
|
|
953 #
|
|
954 # FingerprintsStringType = FingerprintsVector
|
|
955 #
|
|
956 # Description = TopologicalPharmacophoreAtomPairs:ArbitrarySize:MinDistance1:MaxDistance10
|
|
957 # VectorStringFormat = IDsAndValuesString
|
|
958 # VectorValuesType = NumericalValues
|
|
959 #
|
|
960 Cmpd1 54;H-D1-H H-D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA...;18 1 2...
|
|
961 Cmpd2 61;H-D1-H H-D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA...;5 1 2 ...
|
|
962 ... ...
|
|
963 ... ..
|
|
964
|
|
965 Example of CSV I<Text> file containing topological pharmacophore atom pairs fingerprints string data:
|
|
966
|
|
967 "CompoundID","TopologicalPharmacophoreAtomPairsFingerprints"
|
|
968 "Cmpd1","FingerprintsVector;TopologicalPharmacophoreAtomPairs:Arbitrary
|
|
969 Size:MinDistance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H
|
|
970 -D1-H H-D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA H
|
|
971 BA-D2-HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4...;
|
|
972 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10 3
|
|
973 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1"
|
|
974 ... ...
|
|
975 ... ...
|
|
976
|
|
977 The current release of MayaChemTools generates the following types of topological pharmacophore
|
|
978 atom pairs fingerprints vector strings:
|
|
979
|
|
980 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min
|
|
981 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H
|
|
982 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2-
|
|
983 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H
|
|
984 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...;
|
|
985 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10
|
|
986 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1
|
|
987
|
|
988 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
|
|
989 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0
|
|
990 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1
|
|
991 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0
|
|
992 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0
|
|
993 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18...
|
|
994
|
|
995 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
|
|
996 ance1:MaxDistance10;150;OrderedNumericalValues;IDsAndValuesString;H-D1
|
|
997 -H H-D1-HBA H-D1-HBD H-D1-NI H-D1-PI HBA-D1-HBA HBA-D1-HBD HBA-D1-NI H
|
|
998 BA-D1-PI HBD-D1-HBD HBD-D1-NI HBD-D1-PI NI-D1-NI NI-D1-PI PI-D1-PI H-D
|
|
999 2-H H-D2-HBA H-D2-HBD H-D2-NI H-D2-PI HBA-D2-HBA HBA-D2-HBD HBA-D2...;
|
|
1000 18 0 0 1 0 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3
|
|
1001 1 0 0 0 1 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0
|
|
1002 1 0 0 1 0 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0
|
|
1003
|
|
1004
|
|
1005 =head1 OPTIONS
|
|
1006
|
|
1007 =over 4
|
|
1008
|
|
1009 =item B<--AromaticityModel> I<MDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel>
|
|
1010
|
|
1011 Specify aromaticity model to use during detection of aromaticity. Possible values in the current
|
|
1012 release are: I<MDLAromaticityModel, TriposAromaticityModel, MMFFAromaticityModel,
|
|
1013 ChemAxonBasicAromaticityModel, ChemAxonGeneralAromaticityModel, DaylightAromaticityModel
|
|
1014 or MayaChemToolsAromaticityModel>. Default value: I<MayaChemToolsAromaticityModel>.
|
|
1015
|
|
1016 The supported aromaticity model names along with model specific control parameters
|
|
1017 are defined in B<AromaticityModelsData.csv>, which is distributed with the current release
|
|
1018 and is available under B<lib/data> directory. B<Molecule.pm> module retrieves data from
|
|
1019 this file during class instantiation and makes it available to method B<DetectAromaticity>
|
|
1020 for detecting aromaticity corresponding to a specific model.
|
|
1021
|
|
1022 =item B<--AtomPairsSetSizeToUse> I<ArbitrarySize | FixedSize>
|
|
1023
|
|
1024 Atom pairs set size to use during generation of topological pharmacophore atom pairs
|
|
1025 fingerprints.
|
|
1026
|
|
1027 Possible values: I<ArbitrarySize | FixedSize>; Default value: I<ArbitrarySize>.
|
|
1028
|
|
1029 For I<ArbitrarySize> value of B<--AtomPairsSetSizeToUse> option, the fingerprint vector
|
|
1030 correspond to only those topological pharmacophore atom pairs which are present and
|
|
1031 have non-zero count. However, for I<FixedSize> value of B<--AtomPairsSetSizeToUse>
|
|
1032 option, the fingerprint vector contains all possible valid topological pharmacophore atom
|
|
1033 pairs with both zero and non-zero count values.
|
|
1034
|
|
1035 =item B<-a, --AtomTypesToUse> I<"AtomType1,AtomType2,...">
|
|
1036
|
|
1037 Pharmacophore atom types to use during generation of topological phramacophore
|
|
1038 atom pairs. It's a list of comma separated valid pharmacophore atom types.
|
|
1039
|
|
1040 Possible values for pharmacophore atom types are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
|
|
1041 Default value [ Ref 60-62 ] : I<HBD,HBA,PI,NI,H>.
|
|
1042
|
|
1043 The pharmacophore atom types abbreviations correspond to:
|
|
1044
|
|
1045 HBD: HydrogenBondDonor
|
|
1046 HBA: HydrogenBondAcceptor
|
|
1047 PI : PositivelyIonizable
|
|
1048 NI : NegativelyIonizable
|
|
1049 Ar : Aromatic
|
|
1050 Hal : Halogen
|
|
1051 H : Hydrophobic
|
|
1052 RA : RingAtom
|
|
1053 CA : ChainAtom
|
|
1054
|
|
1055 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign pharmacophore atom
|
|
1056 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]:
|
|
1057
|
|
1058 HydrogenBondDonor: NH, NH2, OH
|
|
1059 HydrogenBondAcceptor: N[!H], O
|
|
1060 PositivelyIonizable: +, NH2
|
|
1061 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
|
|
1062
|
|
1063 =item B<--AtomTypesWeight> I<"AtomType1,Weight1,AtomType2,Weight2...">
|
|
1064
|
|
1065 Weights of specified pharmacophore atom types to use during calculation of their contribution
|
|
1066 to atom pair count. Default value: I<None>. Valid values: real numbers greater than 0. In general
|
|
1067 it's comma delimited list of valid atom type and its weight.
|
|
1068
|
|
1069 The weight values allow to increase the importance of specific pharmacophore atom type
|
|
1070 in the generated fingerprints. A weight value of 0 for an atom type eliminates its contribution to
|
|
1071 atom pair count where as weight value of 2 doubles its contribution.
|
|
1072
|
|
1073 =item B<--CompoundID> I<DataFieldName or LabelPrefixString>
|
|
1074
|
|
1075 This value is B<--CompoundIDMode> specific and indicates how compound ID is generated.
|
|
1076
|
|
1077 For I<DataField> value of B<--CompoundIDMode> option, it corresponds to datafield label name
|
|
1078 whose value is used as compound ID; otherwise, it's a prefix string used for generating compound
|
|
1079 IDs like LabelPrefixString<Number>. Default value, I<Cmpd>, generates compound IDs which
|
|
1080 look like Cmpd<Number>.
|
|
1081
|
|
1082 Examples for I<DataField> value of B<--CompoundIDMode>:
|
|
1083
|
|
1084 MolID
|
|
1085 ExtReg
|
|
1086
|
|
1087 Examples for I<LabelPrefix> or I<MolNameOrLabelPrefix> value of B<--CompoundIDMode>:
|
|
1088
|
|
1089 Compound
|
|
1090
|
|
1091 The value specified above generates compound IDs which correspond to Compound<Number>
|
|
1092 instead of default value of Cmpd<Number>.
|
|
1093
|
|
1094 =item B<--CompoundIDLabel> I<text>
|
|
1095
|
|
1096 Specify compound ID column label for CSV/TSV text file(s) used during I<CompoundID> value
|
|
1097 of B<--DataFieldsMode> option. Default value: I<CompoundID>.
|
|
1098
|
|
1099 =item B<--CompoundIDMode> I<DataField | MolName | LabelPrefix | MolNameOrLabelPrefix>
|
|
1100
|
|
1101 Specify how to generate compound IDs and write to FP or CSV/TSV text file(s) along with generated
|
|
1102 fingerprints for I<FP | text | all> values of B<--output> option: use a I<SDFile(s)> datafield value;
|
|
1103 use molname line from I<SDFile(s)>; generate a sequential ID with specific prefix; use combination
|
|
1104 of both MolName and LabelPrefix with usage of LabelPrefix values for empty molname lines.
|
|
1105
|
|
1106 Possible values: I<DataField | MolName | LabelPrefix | MolNameOrLabelPrefix>.
|
|
1107 Default value: I<LabelPrefix>.
|
|
1108
|
|
1109 For I<MolNameAndLabelPrefix> value of B<--CompoundIDMode>, molname line in I<SDFile(s)> takes
|
|
1110 precedence over sequential compound IDs generated using I<LabelPrefix> and only empty molname
|
|
1111 values are replaced with sequential compound IDs.
|
|
1112
|
|
1113 This is only used for I<CompoundID> value of B<--DataFieldsMode> option.
|
|
1114
|
|
1115 =item B<--DataFields> I<"FieldLabel1,FieldLabel2,...">
|
|
1116
|
|
1117 Comma delimited list of I<SDFiles(s)> data fields to extract and write to CSV/TSV text file(s) along
|
|
1118 with generated fingerprints for I<text | all> values of B<--output> option.
|
|
1119
|
|
1120 This is only used for I<Specify> value of B<--DataFieldsMode> option.
|
|
1121
|
|
1122 Examples:
|
|
1123
|
|
1124 Extreg
|
|
1125 MolID,CompoundName
|
|
1126
|
|
1127 =item B<-d, --DataFieldsMode> I<All | Common | Specify | CompoundID>
|
|
1128
|
|
1129 Specify how data fields in I<SDFile(s)> are transferred to output CSV/TSV text file(s) along
|
|
1130 with generated fingerprints for I<text | all> values of B<--output> option: transfer all SD
|
|
1131 data field; transfer SD data files common to all compounds; extract specified data fields;
|
|
1132 generate a compound ID using molname line, a compound prefix, or a combination of both.
|
|
1133 Possible values: I<All | Common | specify | CompoundID>. Default value: I<CompoundID>.
|
|
1134
|
|
1135 =item B<-f, --Filter> I<Yes | No>
|
|
1136
|
|
1137 Specify whether to check and filter compound data in SDFile(s). Possible values: I<Yes or No>.
|
|
1138 Default value: I<Yes>.
|
|
1139
|
|
1140 By default, compound data is checked before calculating fingerprints and compounds containing
|
|
1141 atom data corresponding to non-element symbols or no atom data are ignored.
|
|
1142
|
|
1143 =item B<--FingerprintsLabelMode> I<FingerprintsLabelOnly | FingerprintsLabelWithIDs>
|
|
1144
|
|
1145 Specify how fingerprints label is generated in conjunction with B<--FingerprintsLabel> option value:
|
|
1146 use fingerprints label generated only by B<--FingerprintsLabel> option value or append topological
|
|
1147 atom pair count value IDs to B<--FingerprintsLabel> option value.
|
|
1148
|
|
1149 Possible values: I<FingerprintsLabelOnly | FingerprintsLabelWithIDs>. Default value:
|
|
1150 I<FingerprintsLabelOnly>.
|
|
1151
|
|
1152 Topological atom pairs IDs appended to B<--FingerprintsLabel> value during I<FingerprintsLabelWithIDs>
|
|
1153 values of B<--FingerprintsLabelMode> correspond to atom pair count values in fingerprint vector string.
|
|
1154
|
|
1155 I<FingerprintsLabelWithIDs> value of B<--FingerprintsLabelMode> is ignored during I<ArbitrarySize> value
|
|
1156 of B<--AtomPairsSetSizeToUse> option and topological atom pairs IDs not appended to the label.
|
|
1157
|
|
1158 =item B<--FingerprintsLabel> I<text>
|
|
1159
|
|
1160 SD data label or text file column label to use for fingerprints string in output SD or
|
|
1161 CSV/TSV text file(s) specified by B<--output>. Default value: I<TopologicalPharmacophoreAtomPairsFingerprints>.
|
|
1162
|
|
1163 =item B<--FuzzifyAtomPairsCount> I<Yes | No>
|
|
1164
|
|
1165 To fuzzify or not to fuzzify atom pairs count. Possible values: I<Yes or No>. Default value:
|
|
1166 I<No>.
|
|
1167
|
|
1168 =item B<--FuzzificationMode> I<BeforeNormalization | AfterNormalization>
|
|
1169
|
|
1170 When to fuzzify atom pairs count. Possible values: I<BeforeNormalization | AfterNormalizationYes>.
|
|
1171 Default value: I<AfterNormalization>.
|
|
1172
|
|
1173 =item B<--FuzzificationMethodology> I<FuzzyBinning | FuzzyBinSmoothing>
|
|
1174
|
|
1175 How to fuzzify atom pairs count. Possible values: I<FuzzyBinning | FuzzyBinSmoothing>.
|
|
1176 Default value: I<FuzzyBinning>.
|
|
1177
|
|
1178 In conjunction with values for options B<--FuzzifyAtomPairsCount>, B<--FuzzificationMode> and
|
|
1179 B<--FuzzFactor>, B<--FuzzificationMethodology> option is used to fuzzify pharmacophore atom
|
|
1180 pairs count.
|
|
1181
|
|
1182 Let:
|
|
1183
|
|
1184 Px = Pharmacophore atom type x
|
|
1185 Py = Pharmacophore atom type y
|
|
1186 PPxy = Pharmacophore atom pair between atom type Px and Py
|
|
1187
|
|
1188 PPxyDn = Pharmacophore atom pairs count between atom type Px and Py
|
|
1189 at distance Dn
|
|
1190 PPxyDn-1 = Pharmacophore atom pairs count between atom type Px and Py
|
|
1191 at distance Dn - 1
|
|
1192 PPxyDn+1 = Pharmacophore atom pairs count between atom type Px and Py
|
|
1193 at distance Dn + 1
|
|
1194
|
|
1195 FF = FuzzFactor for FuzzyBinning and FuzzyBinSmoothing
|
|
1196
|
|
1197 Then:
|
|
1198
|
|
1199 For I<FuzzyBinning>:
|
|
1200
|
|
1201 PPxyDn = PPxyDn (Unchanged)
|
|
1202
|
|
1203 PPxyDn-1 = PPxyDn-1 + PPxyDn * FF
|
|
1204 PPxyDn+1 = PPxyDn+1 + PPxyDn * FF
|
|
1205
|
|
1206 For I<FuzzyBinSmoothing>:
|
|
1207
|
|
1208 PPxyDn = PPxyDn - PPxyDn * 2FF for Dmin < Dn < Dmax
|
|
1209 PPxyDn = PPxyDn - PPxyDn * FF for Dn = Dmin or Dmax
|
|
1210
|
|
1211 PPxyDn-1 = PPxyDn-1 + PPxyDn * FF
|
|
1212 PPxyDn+1 = PPxyDn+1 + PPxyDn * FF
|
|
1213
|
|
1214 In both fuzzification schemes, a value of 0 for FF implies no fuzzification of occurrence counts.
|
|
1215 A value of 1 during I<FuzzyBinning> corresponds to maximum fuzzification of occurrence counts;
|
|
1216 however, a value of 1 during I<FuzzyBinSmoothing> ends up completely distributing the value over
|
|
1217 the previous and next distance bins.
|
|
1218
|
|
1219 So for default value of B<--FuzzFactor> (FF) 0.15, the occurrence count of pharmacohore atom pairs
|
|
1220 at distance Dn during FuzzyBinning is left unchanged and the counts at distances Dn -1 and Dn + 1
|
|
1221 are incremented by PPxyDn * 0.15.
|
|
1222
|
|
1223 And during I<FuzzyBinSmoothing> the occurrence counts at Distance Dn is scaled back using multiplicative
|
|
1224 factor of (1 - 2*0.15) and the occurrence counts at distances Dn -1 and Dn + 1 are incremented by
|
|
1225 PPxyDn * 0.15. In otherwords, occurrence bin count is smoothed out by distributing it over the
|
|
1226 previous and next distance value.
|
|
1227
|
|
1228 =item B<--FuzzFactor> I<number>
|
|
1229
|
|
1230 Specify by how much to fuzzify atom pairs count. Default value: I<0.15>. Valid values: For
|
|
1231 I<FuzzyBinning> value of B<--FuzzificationMethodology> option: I<between 0 and 1.0>; For
|
|
1232 I<FuzzyBinSmoothing> value of B<--FuzzificationMethodology> option: I<between 0 and 0.5>.
|
|
1233
|
|
1234 =item B<-h, --help>
|
|
1235
|
|
1236 Print this help message.
|
|
1237
|
|
1238 =item B<-k, --KeepLargestComponent> I<Yes | No>
|
|
1239
|
|
1240 Generate fingerprints for only the largest component in molecule. Possible values:
|
|
1241 I<Yes or No>. Default value: I<Yes>.
|
|
1242
|
|
1243 For molecules containing multiple connected components, fingerprints can be generated
|
|
1244 in two different ways: use all connected components or just the largest connected
|
|
1245 component. By default, all atoms except for the largest connected component are
|
|
1246 deleted before generation of fingerprints.
|
|
1247
|
|
1248 =item B<--MinDistance> I<number>
|
|
1249
|
|
1250 Minimum bond distance between atom pairs for generating topological pharmacophore atom
|
|
1251 pairs. Default value: I<1>. Valid values: positive integers including 0 and less than B<--MaxDistance>.
|
|
1252
|
|
1253 =item B<--MaxDistance> I<number>
|
|
1254
|
|
1255 Maximum bond distance between atom pairs for generating topological pharmacophore atom
|
|
1256 pairs. Default value: I<10>. Valid values: positive integers and greater than B<--MinDistance>.
|
|
1257
|
|
1258 =item B<-n, --NormalizationMethodology> I<None | ByHeavyAtomsCount | ByAtomTypesCount>
|
|
1259
|
|
1260 Normalization methodology to use for scaling the occurrence count of pharmacophore atom
|
|
1261 pairs within specified distance range. Possible values: I<None, ByHeavyAtomsCount or
|
|
1262 ByAtomTypesCount>. Default value: I<None>.
|
|
1263
|
|
1264 =item B<--OutDelim> I<comma | tab | semicolon>
|
|
1265
|
|
1266 Delimiter for output CSV/TSV text file(s). Possible values: I<comma, tab, or semicolon>
|
|
1267 Default value: I<comma>.
|
|
1268
|
|
1269 =item B<--output> I<SD | FP | text | all>
|
|
1270
|
|
1271 Type of output files to generate. Possible values: I<SD, FP, text, or all>. Default value: I<text>.
|
|
1272
|
|
1273 =item B<-o, --overwrite>
|
|
1274
|
|
1275 Overwrite existing files.
|
|
1276
|
|
1277 =item B<-q, --quote> I<Yes | No>
|
|
1278
|
|
1279 Put quote around column values in output CSV/TSV text file(s). Possible values:
|
|
1280 I<Yes or No>. Default value: I<Yes>
|
|
1281
|
|
1282 =item B<-r, --root> I<RootName>
|
|
1283
|
|
1284 New file name is generated using the root: <Root>.<Ext>. Default for new file names:
|
|
1285 <SDFileName><TopologicalPharmacophoreAtomPairsFP>.<Ext>. The file type determines <Ext> value.
|
|
1286 The sdf, fpf, csv, and tsv <Ext> values are used for SD, FP, comma/semicolon, and tab
|
|
1287 delimited text files, respectively.This option is ignored for multiple input files.
|
|
1288
|
|
1289 =item B<--ValuesPrecision> I<number>
|
|
1290
|
|
1291 Precision of atom pairs count real values which might be generated after normalization
|
|
1292 or fuzzification. Default value: up to I<2> decimal places. Valid values: positive integers.
|
|
1293
|
|
1294 =item B<-v, --VectorStringFormat> I<ValuesString, IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString>
|
|
1295
|
|
1296 Format of fingerprints vector string data in output SD, FP or CSV/TSV text file(s) specified by
|
|
1297 B<--output> option. Possible values: I<ValuesString, IDsAndValuesString | IDsAndValuesPairsString
|
|
1298 | ValuesAndIDsString | ValuesAndIDsPairsString>.
|
|
1299
|
|
1300 Default value during I<FixedSize> value of B<--AtomPairsSetSizeToUse> option: I<ValuesString>. Default
|
|
1301 value during I<ArbitrarySize> value of B<--AtomPairsSetSizeToUse> option: I<IDsAndValuesString>.
|
|
1302
|
|
1303 I<ValuesString> option value is not allowed for I<ArbitrarySize> value of B<--AtomPairsSetSizeToUse>
|
|
1304 option.
|
|
1305
|
|
1306 Examples:
|
|
1307
|
|
1308 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min
|
|
1309 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H
|
|
1310 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2-
|
|
1311 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H
|
|
1312 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...;
|
|
1313 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10
|
|
1314 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1
|
|
1315
|
|
1316 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
|
|
1317 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0
|
|
1318 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1
|
|
1319 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0
|
|
1320 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0
|
|
1321 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18...
|
|
1322
|
|
1323 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
|
|
1324 ance1:MaxDistance10;150;OrderedNumericalValues;IDsAndValuesString;H-D1
|
|
1325 -H H-D1-HBA H-D1-HBD H-D1-NI H-D1-PI HBA-D1-HBA HBA-D1-HBD HBA-D1-NI H
|
|
1326 BA-D1-PI HBD-D1-HBD HBD-D1-NI HBD-D1-PI NI-D1-NI NI-D1-PI PI-D1-PI H-D
|
|
1327 2-H H-D2-HBA H-D2-HBD H-D2-NI H-D2-PI HBA-D2-HBA HBA-D2-HBD HBA-D2...;
|
|
1328 18 0 0 1 0 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3
|
|
1329 1 0 0 0 1 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0
|
|
1330 1 0 0 1 0 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0
|
|
1331
|
|
1332 =item B<-w, --WorkingDir> I<DirName>
|
|
1333
|
|
1334 Location of working directory. Default value: current directory.
|
|
1335
|
|
1336 =back
|
|
1337
|
|
1338 =head1 EXAMPLES
|
|
1339
|
|
1340 To generate topological pharmacophore atom pairs fingerprints of arbitrary size corresponding to distances
|
|
1341 from 1 through 10 using default atom types with no weighting, normalization, and fuzzification
|
|
1342 of atom pairs count and create a SampleTPAPFP.csv file containing sequential compound IDs along
|
|
1343 with fingerprints vector strings data in ValuesString format, type:
|
|
1344
|
|
1345 % TopologicalPharmacophoreAtomPairsFingerprints.pl -r SampleTPAPFP
|
|
1346 -o Sample.sdf
|
|
1347
|
|
1348 To generate topological pharmacophore atom pairs fingerprints of fixed size corresponding to distances
|
|
1349 from 1 through 10 using default atom types with no weighting, normalization, and fuzzification
|
|
1350 of atom pairs count and create a SampleTPAPFP.csv file containing sequential compound IDs along
|
|
1351 with fingerprints vector strings data in ValuesString format, type:
|
|
1352
|
|
1353 % TopologicalPharmacophoreAtomPairsFingerprints.pl
|
|
1354 --AtomPairsSetSizeToUse FixedSize -r SampleTPAPFP-o Sample.sdf
|
|
1355
|
|
1356 To generate topological pharmacophore atom pairs fingerprints of arbitrary size corresponding to distances
|
|
1357 from 1 through 10 using default atom types with no weighting, normalization, and fuzzification
|
|
1358 of atom pairs count and create SampleTPAPFP.sdf, SampleTPAPFP.fpf and SampleTPAPFP.csv files containing
|
|
1359 sequential compound IDs in CSV file along with fingerprints vector strings data in ValuesString
|
|
1360 format, type:
|
|
1361
|
|
1362 % TopologicalPharmacophoreAtomPairsFingerprints.pl --output all
|
|
1363 -r SampleTPAPFP -o Sample.sdf
|
|
1364
|
|
1365 To generate topological pharmacophore atom pairs fingerprints of arbitrary size corresponding to distances
|
|
1366 from 1 through 10 using default atom types with no weighting, normalization, and fuzzification
|
|
1367 of atom pairs count and create a SampleTPAPFP.csv file containing sequential compound IDs along
|
|
1368 with fingerprints vector strings data in IDsAndValuesPairsString format, type:
|
|
1369
|
|
1370 % TopologicalPharmacophoreAtomPairsFingerprints.pl --VectorStringFormat
|
|
1371 IDsAndValuesPairsString -r SampleTPAPFP -o Sample.sdf
|
|
1372
|
|
1373 To generate topological pharmacophore atom pairs fingerprints of arbitrary size corresponding to distances
|
|
1374 from 1 through 6 using default atom types with no weighting, normalization, and fuzzification
|
|
1375 of atom pairs count and create a SampleTPAPFP.csv file containing sequential compound IDs along
|
|
1376 with fingerprints vector strings data in ValuesString format, type:
|
|
1377
|
|
1378 % TopologicalPharmacophoreAtomPairsFingerprints.pl --MinDistance 1
|
|
1379 -MaxDistance 6 -r SampleTPAPFP -o Sample.sdf
|
|
1380
|
|
1381 To generate topological pharmacophore atom pairs fingerprints of arbitrary size corresponding to distances
|
|
1382 from 1 through 10 using "HBD,HBA,PI,NI" atom types with double the weighting for "HBD,HBA" and
|
|
1383 normalization by HeavyAtomCount but no fuzzification of atom pairs count and create a
|
|
1384 SampleTPAPFP.csv file containing sequential compound IDs along with fingerprints vector strings
|
|
1385 data in ValuesString format, type:
|
|
1386
|
|
1387 % TopologicalPharmacophoreAtomPairsFingerprints.pl --MinDistance 1
|
|
1388 -MaxDistance 10 --AtomTypesToUse "HBD,HBA,PI, NI" --AtomTypesWeight
|
|
1389 "HBD,2,HBA,2,PI,1,NI,1" --NormalizationMethodology ByHeavyAtomsCount
|
|
1390 --FuzzifyAtomPairsCount No -r SampleTPAPFP -o Sample.sdf
|
|
1391
|
|
1392 To generate topological pharmacophore atom pairs fingerprints of arbitrary size corresponding to
|
|
1393 distances from 1 through 10 using "HBD,HBA,PI,NI,H" atom types with no weighting of atom types and
|
|
1394 normalization but with fuzzification of atom pairs count using FuzzyBinning methodology
|
|
1395 with FuzzFactor value 0.15 and create a SampleTPAPFP.csv file containing sequential compound
|
|
1396 IDs along with fingerprints vector strings data in ValuesString format, type:
|
|
1397
|
|
1398 % TopologicalPharmacophoreAtomPairsFingerprints.pl --MinDistance 1
|
|
1399 --MaxDistance 10 --AtomTypesToUse "HBD,HBA,PI, NI,H" --AtomTypesWeight
|
|
1400 "HBD,1,HBA,1,PI,1,NI,1,H,1" --NormalizationMethodology None
|
|
1401 --FuzzifyAtomPairsCount Yes --FuzzificationMethodology FuzzyBinning
|
|
1402 --FuzzFactor 0.5 -r SampleTPAPFP -o Sample.sdf
|
|
1403
|
|
1404 To generate topological pharmacophore atom pairs fingerprints of arbitrary size corresponding to distances
|
|
1405 distances from 1 through 10 using default atom types with no weighting,
|
|
1406 normalization, and fuzzification of atom pairs count and create a SampleTPAPFP.csv
|
|
1407 file containing compound ID from molecule name line along with fingerprints vector strings
|
|
1408 data, type:
|
|
1409
|
|
1410 % TopologicalPharmacophoreAtomPairsFingerprints.pl --DataFieldsMode
|
|
1411 CompoundID -CompoundIDMode MolName -r SampleTPAPFP -o Sample.sdf
|
|
1412
|
|
1413 To generate topological pharmacophore atom pairs fingerprints of arbitrary size corresponding
|
|
1414 to distances from 1 through 10 using default atom types with no weighting,
|
|
1415 normalization, and fuzzification of atom pairs count and create a SampleTPAPFP.csv
|
|
1416 file containing compound IDs using specified data field along with fingerprints vector strings
|
|
1417 data, type:
|
|
1418
|
|
1419 % TopologicalPharmacophoreAtomPairsFingerprints.pl --DataFieldsMode
|
|
1420 CompoundID -CompoundIDMode DataField --CompoundID Mol_ID
|
|
1421 -r SampleTPAPFP -o Sample.sdf
|
|
1422
|
|
1423 To generate topological pharmacophore atom pairs fingerprints of arbitrary size corresponding
|
|
1424 to distances from 1 through 10 using default atom types with no weighting,
|
|
1425 normalization, and fuzzification of atom pairs count and create a SampleTPAPFP.csv
|
|
1426 file containing compound ID using combination of molecule name line and an explicit compound
|
|
1427 prefix along with fingerprints vector strings data, type:
|
|
1428
|
|
1429 % TopologicalPharmacophoreAtomPairsFingerprints.pl --DataFieldsMode
|
|
1430 CompoundID -CompoundIDMode MolnameOrLabelPrefix
|
|
1431 --CompoundID Cmpd --CompoundIDLabel MolID -r SampleTPAPFP -o Sample.sdf
|
|
1432
|
|
1433 To generate topological pharmacophore atom pairs fingerprints of arbitrary size corresponding
|
|
1434 to distances from 1 through 10 using default atom types with no weighting,
|
|
1435 normalization, and fuzzification of atom pairs count and create a SampleTPAPFP.csv
|
|
1436 file containing specific data fields columns along with fingerprints vector strings
|
|
1437 data, type:
|
|
1438
|
|
1439 % TopologicalPharmacophoreAtomPairsFingerprints.pl --DataFieldsMode
|
|
1440 Specify --DataFields Mol_ID -r SampleTPAPFP -o Sample.sdf
|
|
1441
|
|
1442 To generate topological pharmacophore atom pairs fingerprints of arbitrary size corresponding
|
|
1443 to distances from 1 through 10 using default atom types with no weighting,
|
|
1444 normalization, and fuzzification of atom pairs count and create a SampleTPAPFP.csv
|
|
1445 file containing common data fields columns along with fingerprints vector strings
|
|
1446 data, type:
|
|
1447
|
|
1448 % TopologicalPharmacophoreAtomPairsFingerprints.pl --DataFieldsMode
|
|
1449 Common -r SampleTPAPFP -o Sample.sdf
|
|
1450
|
|
1451 To generate topological pharmacophore atom pairs fingerprints of arbitrary size corresponding
|
|
1452 to distances from 1 through 10 using default atom types with no weighting,
|
|
1453 normalization, and fuzzification of atom pairs count and create SampleTPAPFP.sdf, SampleTPAPFP.fpf,
|
|
1454 and SampleTPAPFP.csv files containing all data fields columns in CSV file along with fingerprints
|
|
1455 data, type:
|
|
1456
|
|
1457 % TopologicalPharmacophoreAtomPairsFingerprints.pl --DataFieldsMode
|
|
1458 All --output all -r SampleTPAPFP -o Sample.sdf
|
|
1459
|
|
1460
|
|
1461 =head1 AUTHOR
|
|
1462
|
|
1463 Manish Sud <msud@san.rr.com>
|
|
1464
|
|
1465 =head1 SEE ALSO
|
|
1466
|
|
1467 InfoFingerprintsFiles.pl, SimilarityMatricesFingerprints.pl, AtomNeighborhoodsFingerprints.pl,
|
|
1468 ExtendedConnectivityFingerprints.pl, MACCSKeysFingerprints.pl, PathLengthFingerprints.pl,
|
|
1469 TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl,
|
|
1470 TopologicalPharmacophoreAtomTripletsFingerprints.pl
|
|
1471
|
|
1472 =head1 COPYRIGHT
|
|
1473
|
|
1474 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
1475
|
|
1476 This file is part of MayaChemTools.
|
|
1477
|
|
1478 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
1479 the terms of the GNU Lesser General Public License as published by the Free
|
|
1480 Software Foundation; either version 3 of the License, or (at your option)
|
|
1481 any later version.
|
|
1482
|
|
1483 =cut
|