comparison bin/TopologicalAtomTorsionsFingerprints.pl @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 #!/usr/bin/perl -w
2 #
3 # $RCSfile: TopologicalAtomTorsionsFingerprints.pl,v $
4 # $Date: 2015/02/28 20:46:22 $
5 # $Revision: 1.32 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use FindBin; use lib "$FindBin::Bin/../lib";
31 use Getopt::Long;
32 use File::Basename;
33 use Text::ParseWords;
34 use Benchmark;
35 use FileUtil;
36 use TextUtil;
37 use SDFileUtil;
38 use MoleculeFileIO;
39 use FileIO::FingerprintsSDFileIO;
40 use FileIO::FingerprintsTextFileIO;
41 use FileIO::FingerprintsFPFileIO;
42 use AtomTypes::AtomicInvariantsAtomTypes;
43 use AtomTypes::FunctionalClassAtomTypes;
44 use Fingerprints::TopologicalAtomTorsionsFingerprints;
45
46 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
47
48 # Autoflush STDOUT
49 $| = 1;
50
51 # Starting message...
52 $ScriptName = basename($0);
53 print "\n$ScriptName: Starting...\n\n";
54 $StartTime = new Benchmark;
55
56 # Get the options and setup script...
57 SetupScriptUsage();
58 if ($Options{help} || @ARGV < 1) {
59 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
60 }
61
62 my(@SDFilesList);
63 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
64
65 # Process options...
66 print "Processing options...\n";
67 my(%OptionsInfo);
68 ProcessOptions();
69
70 # Setup information about input files...
71 print "Checking input SD file(s)...\n";
72 my(%SDFilesInfo);
73 RetrieveSDFilesInfo();
74
75 # Process input files..
76 my($FileIndex);
77 if (@SDFilesList > 1) {
78 print "\nProcessing SD files...\n";
79 }
80 for $FileIndex (0 .. $#SDFilesList) {
81 if ($SDFilesInfo{FileOkay}[$FileIndex]) {
82 print "\nProcessing file $SDFilesList[$FileIndex]...\n";
83 GenerateTopologicalAtomTorsionsFingerprints($FileIndex);
84 }
85 }
86 print "\n$ScriptName:Done...\n\n";
87
88 $EndTime = new Benchmark;
89 $TotalTime = timediff ($EndTime, $StartTime);
90 print "Total time: ", timestr($TotalTime), "\n";
91
92 ###############################################################################
93
94 # Generate fingerprints for a SD file...
95 #
96 sub GenerateTopologicalAtomTorsionsFingerprints {
97 my($FileIndex) = @_;
98 my($CmpdCount, $IgnoredCmpdCount, $SDFile, $MoleculeFileIO, $Molecule, $TopologicalAtomTorsionsFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
99
100 $SDFile = $SDFilesList[$FileIndex];
101
102 # Setup output files...
103 #
104 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = SetupAndOpenOutputFiles($FileIndex);
105
106 $MoleculeFileIO = new MoleculeFileIO('Name' => $SDFile);
107 $MoleculeFileIO->Open();
108
109 $CmpdCount = 0;
110 $IgnoredCmpdCount = 0;
111
112 COMPOUND: while ($Molecule = $MoleculeFileIO->ReadMolecule()) {
113 $CmpdCount++;
114
115 # Filter compound data before calculating fingerprints...
116 if ($OptionsInfo{Filter}) {
117 if (CheckAndFilterCompound($CmpdCount, $Molecule)) {
118 $IgnoredCmpdCount++;
119 next COMPOUND;
120 }
121 }
122
123 $TopologicalAtomTorsionsFingerprints = GenerateMoleculeFingerprints($Molecule);
124 if (!$TopologicalAtomTorsionsFingerprints) {
125 $IgnoredCmpdCount++;
126 ProcessIgnoredCompound('FingerprintsGenerationFailed', $CmpdCount, $Molecule);
127 next COMPOUND;
128 }
129
130 WriteDataToOutputFiles($FileIndex, $CmpdCount, $Molecule, $TopologicalAtomTorsionsFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
131 }
132 $MoleculeFileIO->Close();
133
134 if ($NewFPSDFileIO) {
135 $NewFPSDFileIO->Close();
136 }
137 if ($NewFPTextFileIO) {
138 $NewFPTextFileIO->Close();
139 }
140 if ($NewFPFileIO) {
141 $NewFPFileIO->Close();
142 }
143
144 WriteFingerprintsGenerationSummaryStatistics($CmpdCount, $IgnoredCmpdCount);
145 }
146
147 # Process compound being ignored due to problems in fingerprints geneation...
148 #
149 sub ProcessIgnoredCompound {
150 my($Mode, $CmpdCount, $Molecule) = @_;
151 my($CmpdID, $DataFieldLabelAndValuesRef);
152
153 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
154 $CmpdID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
155
156 MODE: {
157 if ($Mode =~ /^ContainsNonElementalData$/i) {
158 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains atom data corresponding to non-elemental atom symbol(s)...\n\n";
159 next MODE;
160 }
161
162 if ($Mode =~ /^ContainsNoElementalData$/i) {
163 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains no atom data...\n\n";
164 next MODE;
165 }
166
167 if ($Mode =~ /^FingerprintsGenerationFailed$/i) {
168 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n";
169 next MODE;
170 }
171 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n";
172 }
173 }
174
175 # Check and filter compounds....
176 #
177 sub CheckAndFilterCompound {
178 my($CmpdCount, $Molecule) = @_;
179 my($ElementCount, $NonElementCount);
180
181 ($ElementCount, $NonElementCount) = $Molecule->GetNumOfElementsAndNonElements();
182
183 if ($NonElementCount) {
184 ProcessIgnoredCompound('ContainsNonElementalData', $CmpdCount, $Molecule);
185 return 1;
186 }
187
188 if (!$ElementCount) {
189 ProcessIgnoredCompound('ContainsNoElementalData', $CmpdCount, $Molecule);
190 return 1;
191 }
192
193 return 0;
194 }
195
196 # Write out compounds fingerprints generation summary statistics...
197 #
198 sub WriteFingerprintsGenerationSummaryStatistics {
199 my($CmpdCount, $IgnoredCmpdCount) = @_;
200 my($ProcessedCmpdCount);
201
202 $ProcessedCmpdCount = $CmpdCount - $IgnoredCmpdCount;
203
204 print "\nNumber of compounds: $CmpdCount\n";
205 print "Number of compounds processed successfully during fingerprints generation: $ProcessedCmpdCount\n";
206 print "Number of compounds ignored during fingerprints generation: $IgnoredCmpdCount\n";
207 }
208
209 # Open output files...
210 #
211 sub SetupAndOpenOutputFiles {
212 my($FileIndex) = @_;
213 my($NewFPSDFile, $NewFPFile, $NewFPTextFile, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO, %FingerprintsFileIOParams);
214
215 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = (undef) x 3;
216
217 # Setup common parameters for fingerprints file IO objects...
218 #
219 %FingerprintsFileIOParams = ('Mode' => 'Write', 'Overwrite' => $OptionsInfo{OverwriteFiles}, 'FingerprintsStringMode' => 'FingerprintsVectorString', 'VectorStringFormat' => $OptionsInfo{VectorStringFormat});
220
221 if ($OptionsInfo{SDOutput}) {
222 $NewFPSDFile = $SDFilesInfo{SDOutFileNames}[$FileIndex];
223 print "Generating SD file $NewFPSDFile...\n";
224 $NewFPSDFileIO = new FileIO::FingerprintsSDFileIO('Name' => $NewFPSDFile, %FingerprintsFileIOParams, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsLabel});
225 $NewFPSDFileIO->Open();
226 }
227
228 if ($OptionsInfo{FPOutput}) {
229 $NewFPFile = $SDFilesInfo{FPOutFileNames}[$FileIndex];
230 print "Generating FP file $NewFPFile...\n";
231 $NewFPFileIO = new FileIO::FingerprintsFPFileIO('Name' => $NewFPFile, %FingerprintsFileIOParams);
232 $NewFPFileIO->Open();
233 }
234
235 if ($OptionsInfo{TextOutput}) {
236 my($ColLabelsRef);
237
238 $NewFPTextFile = $SDFilesInfo{TextOutFileNames}[$FileIndex];
239 $ColLabelsRef = SetupFPTextFileCoulmnLabels($FileIndex);
240
241 print "Generating text file $NewFPTextFile...\n";
242 $NewFPTextFileIO = new FileIO::FingerprintsTextFileIO('Name' => $NewFPTextFile, %FingerprintsFileIOParams, 'DataColLabels' => $ColLabelsRef, 'OutDelim' => $OptionsInfo{OutDelim}, 'OutQuote' => $OptionsInfo{OutQuote});
243 $NewFPTextFileIO->Open();
244 }
245
246 return ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
247 }
248
249 # Write fingerpritns and other data to appropriate output files...
250 #
251 sub WriteDataToOutputFiles {
252 my($FileIndex, $CmpdCount, $Molecule, $TopologicalAtomTorsionsFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = @_;
253 my($DataFieldLabelAndValuesRef);
254
255 $DataFieldLabelAndValuesRef = undef;
256 if ($NewFPTextFileIO || $NewFPFileIO) {
257 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
258 }
259
260 if ($NewFPSDFileIO) {
261 my($CmpdString);
262
263 $CmpdString = $Molecule->GetInputMoleculeString();
264 $NewFPSDFileIO->WriteFingerprints($TopologicalAtomTorsionsFingerprints, $CmpdString);
265 }
266
267 if ($NewFPTextFileIO) {
268 my($ColValuesRef);
269
270 $ColValuesRef = SetupFPTextFileCoulmnValues($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
271 $NewFPTextFileIO->WriteFingerprints($TopologicalAtomTorsionsFingerprints, $ColValuesRef);
272 }
273
274 if ($NewFPFileIO) {
275 my($CompoundID);
276
277 $CompoundID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
278 $NewFPFileIO->WriteFingerprints($TopologicalAtomTorsionsFingerprints, $CompoundID);
279 }
280 }
281
282 # Generate approriate column labels for FPText output file...
283 #
284 sub SetupFPTextFileCoulmnLabels {
285 my($FileIndex) = @_;
286 my($Line, @ColLabels);
287
288 @ColLabels = ();
289 if ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
290 push @ColLabels, @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
291 }
292 elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
293 push @ColLabels, @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
294 }
295 elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
296 push @ColLabels, @{$OptionsInfo{SpecifiedDataFields}};
297 }
298 elsif ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
299 push @ColLabels, $OptionsInfo{CompoundIDLabel};
300 }
301 # Add fingerprints label...
302 push @ColLabels, $OptionsInfo{FingerprintsLabel};
303
304 return \@ColLabels;
305 }
306
307 # Generate column values FPText output file..
308 #
309 sub SetupFPTextFileCoulmnValues {
310 my($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
311 my(@ColValues);
312
313 @ColValues = ();
314 if ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
315 push @ColValues, SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
316 }
317 elsif ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
318 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
319 }
320 elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
321 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
322 }
323 elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
324 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$OptionsInfo{SpecifiedDataFields}};
325 }
326
327 return \@ColValues;
328 }
329
330 # Generate compound ID for FP and FPText output files..
331 #
332 sub SetupCmpdIDForOutputFiles {
333 my($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
334 my($CmpdID);
335
336 $CmpdID = '';
337 if ($OptionsInfo{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) {
338 my($MolName);
339 $MolName = $Molecule->GetName();
340 $CmpdID = $MolName ? $MolName : "$OptionsInfo{CompoundID}${CmpdCount}";
341 }
342 elsif ($OptionsInfo{CompoundIDMode} =~ /^LabelPrefix$/i) {
343 $CmpdID = "$OptionsInfo{CompoundID}${CmpdCount}";
344 }
345 elsif ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i) {
346 my($SpecifiedDataField);
347 $SpecifiedDataField = $OptionsInfo{CompoundID};
348 $CmpdID = exists $DataFieldLabelAndValuesRef->{$SpecifiedDataField} ? $DataFieldLabelAndValuesRef->{$SpecifiedDataField} : '';
349 }
350 elsif ($OptionsInfo{CompoundIDMode} =~ /^MolName$/i) {
351 $CmpdID = $Molecule->GetName();
352 }
353 return $CmpdID;
354 }
355
356 # Generate fingerprints for molecule...
357 #
358 sub GenerateMoleculeFingerprints {
359 my($Molecule) = @_;
360 my($TopologicalAtomTorsionsFingerprints);
361
362 if ($OptionsInfo{KeepLargestComponent}) {
363 $Molecule->KeepLargestComponent();
364 }
365 if (!$Molecule->DetectRings()) {
366 return undef;
367 }
368 $Molecule->SetAromaticityModel($OptionsInfo{AromaticityModel});
369 $Molecule->DetectAromaticity();
370
371 $TopologicalAtomTorsionsFingerprints = new Fingerprints::TopologicalAtomTorsionsFingerprints('Molecule' => $Molecule, 'AtomIdentifierType' => $OptionsInfo{AtomIdentifierType});
372 SetAtomIdentifierTypeValuesToUse($TopologicalAtomTorsionsFingerprints);
373
374 # Generate fingerprints...
375 $TopologicalAtomTorsionsFingerprints->GenerateFingerprints();
376
377 # Make sure fingerprints generation is successful...
378 if (!$TopologicalAtomTorsionsFingerprints->IsFingerprintsGenerationSuccessful()) {
379 return undef;
380 }
381
382 return $TopologicalAtomTorsionsFingerprints;
383 }
384
385 # Set atom identifier type to use for generating fingerprints...
386 #
387 sub SetAtomIdentifierTypeValuesToUse {
388 my($TopologicalAtomTorsionsFingerprints) = @_;
389
390 if ($OptionsInfo{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
391 $TopologicalAtomTorsionsFingerprints->SetAtomicInvariantsToUse(\@{$OptionsInfo{AtomicInvariantsToUse}});
392 }
393 elsif ($OptionsInfo{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
394 $TopologicalAtomTorsionsFingerprints->SetFunctionalClassesToUse(\@{$OptionsInfo{FunctionalClassesToUse}});
395 }
396 elsif ($OptionsInfo{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
397 # Nothing to do for now...
398 }
399 else {
400 die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n";
401 }
402 }
403
404 # Retrieve information about SD files...
405 #
406 sub RetrieveSDFilesInfo {
407 my($SDFile, $Index, $FileDir, $FileExt, $FileName, $OutFileRoot, $TextOutFileExt, $SDOutFileExt, $FPOutFileExt, $NewSDFileName, $NewFPFileName, $NewTextFileName, $CheckDataField, $CollectDataFields, $AllDataFieldsRef, $CommonDataFieldsRef);
408
409 %SDFilesInfo = ();
410 @{$SDFilesInfo{FileOkay}} = ();
411 @{$SDFilesInfo{OutFileRoot}} = ();
412 @{$SDFilesInfo{SDOutFileNames}} = ();
413 @{$SDFilesInfo{FPOutFileNames}} = ();
414 @{$SDFilesInfo{TextOutFileNames}} = ();
415 @{$SDFilesInfo{AllDataFieldsRef}} = ();
416 @{$SDFilesInfo{CommonDataFieldsRef}} = ();
417
418 $CheckDataField = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) && ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i)) ? 1 : 0;
419 $CollectDataFields = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^(All|Common)$/i)) ? 1 : 0;
420
421 FILELIST: for $Index (0 .. $#SDFilesList) {
422 $SDFile = $SDFilesList[$Index];
423
424 $SDFilesInfo{FileOkay}[$Index] = 0;
425 $SDFilesInfo{OutFileRoot}[$Index] = '';
426 $SDFilesInfo{SDOutFileNames}[$Index] = '';
427 $SDFilesInfo{FPOutFileNames}[$Index] = '';
428 $SDFilesInfo{TextOutFileNames}[$Index] = '';
429
430 $SDFile = $SDFilesList[$Index];
431 if (!(-e $SDFile)) {
432 warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
433 next FILELIST;
434 }
435 if (!CheckFileType($SDFile, "sd sdf")) {
436 warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
437 next FILELIST;
438 }
439
440 if ($CheckDataField) {
441 # Make sure data field exists in SD file..
442 my($CmpdString, $SpecifiedDataField, @CmpdLines, %DataFieldValues);
443
444 @CmpdLines = ();
445 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
446 $CmpdString = ReadCmpdString(\*SDFILE);
447 close SDFILE;
448 @CmpdLines = split "\n", $CmpdString;
449 %DataFieldValues = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
450 $SpecifiedDataField = $OptionsInfo{CompoundID};
451 if (!exists $DataFieldValues{$SpecifiedDataField}) {
452 warn "Warning: Ignoring file $SDFile: Data field value, $SpecifiedDataField, using \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\" doesn't exist\n";
453 next FILELIST;
454 }
455 }
456
457 $AllDataFieldsRef = '';
458 $CommonDataFieldsRef = '';
459 if ($CollectDataFields) {
460 my($CmpdCount);
461 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
462 ($CmpdCount, $AllDataFieldsRef, $CommonDataFieldsRef) = GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
463 close SDFILE;
464 }
465
466 # Setup output file names...
467 $FileDir = ""; $FileName = ""; $FileExt = "";
468 ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile);
469
470 $TextOutFileExt = "csv";
471 if ($Options{outdelim} =~ /^tab$/i) {
472 $TextOutFileExt = "tsv";
473 }
474 $SDOutFileExt = $FileExt;
475 $FPOutFileExt = "fpf";
476
477 if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) {
478 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
479 if ($RootFileName && $RootFileExt) {
480 $FileName = $RootFileName;
481 }
482 else {
483 $FileName = $OptionsInfo{OutFileRoot};
484 }
485 $OutFileRoot = $FileName;
486 }
487 else {
488 $OutFileRoot = "${FileName}TopologicalAtomTorsionsFP";
489 }
490
491 $NewSDFileName = "${OutFileRoot}.${SDOutFileExt}";
492 $NewFPFileName = "${OutFileRoot}.${FPOutFileExt}";
493 $NewTextFileName = "${OutFileRoot}.${TextOutFileExt}";
494
495 if ($OptionsInfo{SDOutput}) {
496 if ($SDFile =~ /$NewSDFileName/i) {
497 warn "Warning: Ignoring input file $SDFile: Same output, $NewSDFileName, and input file names.\n";
498 print "Specify a different name using \"-r --root\" option or use default name.\n";
499 next FILELIST;
500 }
501 }
502
503 if (!$OptionsInfo{OverwriteFiles}) {
504 # Check SD and text outout files...
505 if ($OptionsInfo{SDOutput}) {
506 if (-e $NewSDFileName) {
507 warn "Warning: Ignoring file $SDFile: The file $NewSDFileName already exists\n";
508 next FILELIST;
509 }
510 }
511 if ($OptionsInfo{FPOutput}) {
512 if (-e $NewFPFileName) {
513 warn "Warning: Ignoring file $SDFile: The file $NewFPFileName already exists\n";
514 next FILELIST;
515 }
516 }
517 if ($OptionsInfo{TextOutput}) {
518 if (-e $NewTextFileName) {
519 warn "Warning: Ignoring file $SDFile: The file $NewTextFileName already exists\n";
520 next FILELIST;
521 }
522 }
523 }
524
525 $SDFilesInfo{FileOkay}[$Index] = 1;
526
527 $SDFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
528 $SDFilesInfo{SDOutFileNames}[$Index] = $NewSDFileName;
529 $SDFilesInfo{FPOutFileNames}[$Index] = $NewFPFileName;
530 $SDFilesInfo{TextOutFileNames}[$Index] = $NewTextFileName;
531
532 $SDFilesInfo{AllDataFieldsRef}[$Index] = $AllDataFieldsRef;
533 $SDFilesInfo{CommonDataFieldsRef}[$Index] = $CommonDataFieldsRef;
534 }
535 }
536
537 # Process option values...
538 sub ProcessOptions {
539 %OptionsInfo = ();
540
541 ProcessAtomIdentifierTypeOptions();
542
543 $OptionsInfo{AromaticityModel} = $Options{aromaticitymodel};
544
545 $OptionsInfo{CompoundIDMode} = $Options{compoundidmode};
546 $OptionsInfo{CompoundIDLabel} = $Options{compoundidlabel};
547 $OptionsInfo{DataFieldsMode} = $Options{datafieldsmode};
548
549 my(@SpecifiedDataFields);
550 @SpecifiedDataFields = ();
551
552 @{$OptionsInfo{SpecifiedDataFields}} = ();
553 $OptionsInfo{CompoundID} = '';
554
555 if ($Options{datafieldsmode} =~ /^CompoundID$/i) {
556 if ($Options{compoundidmode} =~ /^DataField$/i) {
557 if (!$Options{compoundid}) {
558 die "Error: You must specify a value for \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\". \n";
559 }
560 $OptionsInfo{CompoundID} = $Options{compoundid};
561 }
562 elsif ($Options{compoundidmode} =~ /^(LabelPrefix|MolNameOrLabelPrefix)$/i) {
563 $OptionsInfo{CompoundID} = $Options{compoundid} ? $Options{compoundid} : 'Cmpd';
564 }
565 }
566 elsif ($Options{datafieldsmode} =~ /^Specify$/i) {
567 if (!$Options{datafields}) {
568 die "Error: You must specify a value for \"--DataFields\" option in \"Specify\" \"-d, --DataFieldsMode\". \n";
569 }
570 @SpecifiedDataFields = split /\,/, $Options{datafields};
571 push @{$OptionsInfo{SpecifiedDataFields}}, @SpecifiedDataFields;
572 }
573
574 $OptionsInfo{Filter} = ($Options{filter} =~ /^Yes$/i) ? 1 : 0;
575
576 $OptionsInfo{FingerprintsLabel} = $Options{fingerprintslabel} ? $Options{fingerprintslabel} : 'TopologicalAtomTorsionsFingerprints';
577
578 $OptionsInfo{KeepLargestComponent} = ($Options{keeplargestcomponent} =~ /^Yes$/i) ? 1 : 0;
579
580 $OptionsInfo{Output} = $Options{output};
581 $OptionsInfo{SDOutput} = ($Options{output} =~ /^(SD|All)$/i) ? 1 : 0;
582 $OptionsInfo{FPOutput} = ($Options{output} =~ /^(FP|All)$/i) ? 1 : 0;
583 $OptionsInfo{TextOutput} = ($Options{output} =~ /^(Text|All)$/i) ? 1 : 0;
584
585 $OptionsInfo{OutDelim} = $Options{outdelim};
586 $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0;
587
588 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
589 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
590
591 $OptionsInfo{VectorStringFormat} = $Options{vectorstringformat};
592 }
593
594 # Process atom identifier type and related options...
595 #
596 sub ProcessAtomIdentifierTypeOptions {
597
598 $OptionsInfo{AtomIdentifierType} = $Options{atomidentifiertype};
599
600 if ($Options{atomidentifiertype} =~ /^AtomicInvariantsAtomTypes$/i) {
601 ProcessAtomicInvariantsToUseOption();
602 }
603 elsif ($Options{atomidentifiertype} =~ /^FunctionalClassAtomTypes$/i) {
604 ProcessFunctionalClassesToUse();
605 }
606 elsif ($OptionsInfo{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
607 # Nothing to do for now...
608 }
609 else {
610 die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n";
611 }
612 }
613
614 # Process specified atomic invariants to use...
615 #
616 sub ProcessAtomicInvariantsToUseOption {
617 my($AtomicInvariant, $AtomSymbolSpecified, @AtomicInvariantsWords);
618
619 @{$OptionsInfo{AtomicInvariantsToUse}} = ();
620 if (IsEmpty($Options{atomicinvariantstouse})) {
621 die "Error: Atomic invariants value specified using \"--AtomicInvariantsToUse\" option is empty\n";
622 }
623 $AtomSymbolSpecified = 0;
624 @AtomicInvariantsWords = split /\,/, $Options{atomicinvariantstouse};
625 for $AtomicInvariant (@AtomicInvariantsWords) {
626 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($AtomicInvariant)) {
627 die "Error: Atomic invariant specified, $AtomicInvariant, using \"--AtomicInvariantsToUse\" option is not valid...\n ";
628 }
629 if ($AtomicInvariant =~ /^(AS|AtomSymbol)$/i) {
630 $AtomSymbolSpecified = 1;
631 }
632 push @{$OptionsInfo{AtomicInvariantsToUse}}, $AtomicInvariant;
633 }
634 if (!$AtomSymbolSpecified) {
635 die "Error: Atomic invariant, AS or AtomSymbol, must be specified as using \"--AtomicInvariantsToUse\" option...\n ";
636 }
637 }
638
639 # Process specified functional classes invariants to use...
640 #
641 sub ProcessFunctionalClassesToUse {
642 my($FunctionalClass, @FunctionalClassesToUseWords);
643
644 @{$OptionsInfo{FunctionalClassesToUse}} = ();
645 if (IsEmpty($Options{functionalclassestouse})) {
646 die "Error: Functional classes value specified using \"--FunctionalClassesToUse\" option is empty\n";
647 }
648 @FunctionalClassesToUseWords = split /\,/, $Options{functionalclassestouse};
649 for $FunctionalClass (@FunctionalClassesToUseWords) {
650 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($FunctionalClass)) {
651 die "Error: Functional class specified, $FunctionalClass, using \"--FunctionalClassesToUse\" option is not valid...\n ";
652 }
653 push @{$OptionsInfo{FunctionalClassesToUse}}, $FunctionalClass;
654 }
655 }
656
657 # Setup script usage and retrieve command line arguments specified using various options...
658 sub SetupScriptUsage {
659
660 # Retrieve all the options...
661 %Options = ();
662
663 $Options{aromaticitymodel} = 'MayaChemToolsAromaticityModel';
664
665 $Options{atomidentifiertype} = 'AtomicInvariantsAtomTypes';
666 $Options{atomicinvariantstouse} = 'AS,X,BO,H,FC';
667
668 $Options{functionalclassestouse} = 'HBD,HBA,PI,NI,Ar,Hal';
669
670 $Options{compoundidmode} = 'LabelPrefix';
671 $Options{compoundidlabel} = 'CompoundID';
672 $Options{datafieldsmode} = 'CompoundID';
673
674 $Options{filter} = 'Yes';
675
676 $Options{keeplargestcomponent} = 'Yes';
677
678 $Options{output} = 'text';
679 $Options{outdelim} = 'comma';
680 $Options{quote} = 'yes';
681
682 $Options{vectorstringformat} = 'IDsAndValuesString';
683
684 if (!GetOptions(\%Options, "aromaticitymodel=s", "atomidentifiertype|a=s", "atomicinvariantstouse=s", "functionalclassestouse=s", "compoundid=s", "compoundidlabel=s", "compoundidmode=s", "datafields=s", "datafieldsmode|d=s", "filter|f=s", "fingerprintslabel=s", "help|h", "keeplargestcomponent|k=s", "outdelim=s", "output=s", "overwrite|o", "quote|q=s", "root|r=s", "vectorstringformat|v=s", "workingdir|w=s")) {
685 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
686 }
687 if ($Options{workingdir}) {
688 if (! -d $Options{workingdir}) {
689 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
690 }
691 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
692 }
693 if (!Molecule::IsSupportedAromaticityModel($Options{aromaticitymodel})) {
694 my(@SupportedModels) = Molecule::GetSupportedAromaticityModels();
695 die "Error: The value specified, $Options{aromaticitymodel}, for option \"--AromaticityModel\" is not valid. Supported aromaticity models in current release of MayaChemTools: @SupportedModels\n";
696 }
697 if ($Options{atomidentifiertype} !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
698 die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n";
699 }
700 if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
701 die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n";
702 }
703 if ($Options{datafieldsmode} !~ /^(All|Common|Specify|CompoundID)$/i) {
704 die "Error: The value specified, $Options{datafieldsmode}, for option \"-d, --DataFieldsMode\" is not valid. Allowed values: All, Common, Specify or CompoundID\n";
705 }
706 if ($Options{filter} !~ /^(Yes|No)$/i) {
707 die "Error: The value specified, $Options{filter}, for option \"-f, --Filter\" is not valid. Allowed values: Yes or No\n";
708 }
709 if ($Options{keeplargestcomponent} !~ /^(Yes|No)$/i) {
710 die "Error: The value specified, $Options{keeplargestcomponent}, for option \"-k, --KeepLargestComponent\" is not valid. Allowed values: Yes or No\n";
711 }
712 if ($Options{output} !~ /^(SD|FP|text|all)$/i) {
713 die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, FP, text, or all\n";
714 }
715 if ($Options{quote} !~ /^(Yes|No)$/i) {
716 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n";
717 }
718 if ($Options{outdelim} =~ /semicolon/i && $Options{quote} =~ /^No$/i) {
719 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not allowed with, semicolon value of \"--outdelim\" option: Fingerprints string use semicolon as delimiter for various data fields and must be quoted.\n";
720 }
721 if ($Options{vectorstringformat} !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString)$/i) {
722 die "Error: The value specified, $Options{vectorstringformat}, for option \"-v, --VectorStringFormat\" is not valid. Allowed values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString\n";
723 }
724 }
725
726 __END__
727
728 =head1 NAME
729
730 TopologicalAtomTorsionsFingerprints.pl - Generate topological atom torsions fingerprints for SD files
731
732 =head1 SYNOPSIS
733
734 TopologicalAtomTorsionsFingerprints.pl SDFile(s)...
735
736 TopologicalAtomTorsionsFingerprints.pl [B<--AromaticityModel> I<AromaticityModelType>]
737 [B<-a, --AtomIdentifierType> I<AtomicInvariantsAtomTypes>]
738 [B<--AtomicInvariantsToUse> I<"AtomicInvariant,AtomicInvariant...">]
739 [B<--FunctionalClassesToUse> I<"FunctionalClass1,FunctionalClass2...">]
740 [B<--CompoundID> I<DataFieldName or LabelPrefixString>] [B<--CompoundIDLabel> I<text>]
741 [B<--CompoundIDMode>] [B<--DataFields> I<"FieldLabel1,FieldLabel2,...">]
742 [B<-d, --DataFieldsMode> I<All | Common | Specify | CompoundID>] [B<-f, --Filter> I<Yes | No>]
743 [B<--FingerprintsLabel> I<text>] [B<-h, --help>] [B<-k, --KeepLargestComponent> I<Yes | No>]
744 [B<--OutDelim> I<comma | tab | semicolon>] [B<--output> I<SD | FP | text | all>] [B<-o, --overwrite>]
745 [B<-q, --quote> I<Yes | No>] [B<-r, --root> I<RootName>] [B<-v, --VectorStringFormat>]
746 [B<-w, --WorkingDir> dirname] SDFile(s)...
747
748 =head1 DESCRIPTION
749
750 Generate topological atom torsions fingerprints [ Ref 58, Ref 72 ] for I<SDFile(s)> and create
751 appropriate SD, FP or CSV/TSV text file(s) containing fingerprints vector strings corresponding to
752 molecular fingerprints.
753
754 Multiple SDFile names are separated by spaces. The valid file extensions are I<.sdf>
755 and I<.sd>. All other file names are ignored. All the SD files in a current directory
756 can be specified either by I<*.sdf> or the current directory name.
757
758 The current release of MayaChemTools supports generation of topological atom torsions
759 fingerprints corresponding to following B<-a, --AtomIdentifierTypes>:
760
761 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
762 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
763 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
764
765 Based on the values specified for B<-a, --AtomIdentifierType> and B<--AtomicInvariantsToUse>,
766 initial atom types are assigned to all non-hydrogen atoms in a molecule. All unique atom torsions
767 are identified and an atom torsion identifier is generated; the format of atom torsion identifier is:
768
769 <AtomType1>-<AtomType2>-<AtomType3>-<AtomType4>
770
771 AtomType1, AtomType2, AtomType3, AtomTyp4: Assigned atom types
772
773 where AtomType1 <= AtomType2 <= AtomType3 <= AtomType4
774
775 The atom torsion identifiers for all unique atom torsions corresponding to non-hydrogen atoms constitute
776 topological atom torsions fingerprints of the molecule.
777
778 Example of I<SD> file containing topological atom torsions fingerprints string data:
779
780 ... ...
781 ... ...
782 $$$$
783 ... ...
784 ... ...
785 ... ...
786 41 44 0 0 0 0 0 0 0 0999 V2000
787 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
788 ... ...
789 2 3 1 0 0 0 0
790 ... ...
791 M END
792 > <CmpdID>
793 Cmpd1
794
795 > <TopologicalAtomTorsionsFingerprints>
796 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;33
797 ;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-C.
798 X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO2.H
799 2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O.X1....;
800 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1
801
802 $$$$
803 ... ...
804 ... ...
805
806 Example of I<FP> file containing topological atom torsions fingerprints string data:
807
808 #
809 # Package = MayaChemTools 7.4
810 # Release Date = Oct 21, 2010
811 #
812 # TimeStamp = Fri Mar 11 15:17:20 2011
813 #
814 # FingerprintsStringType = FingerprintsVector
815 #
816 # Description = TopologicalAtomTorsions:AtomicInvariantsAtomTypes
817 # VectorStringFormat = IDsAndValuesString
818 # VectorValuesType = NumericalValues
819 #
820 Cmpd1 33;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-C.X3.BO4...;2 2 1 1 2 2 ...
821 Cmpd2 23;C.X1.BO1.H3-C.X2.BO2.H2-C.X3.BO3.H1-C.X2.BO2.H2...;2 2 1 5 ...
822 ... ...
823 ... ..
824
825 Example of CSV I<Text> file containing topological atom torsions fingerprints string data:
826
827 "CompoundID","TopologicalAtomTorsionsFingerprints"
828 "Cmpd1","FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAto
829 mTypes;33;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.
830 X3.BO4-C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C
831 .X2.BO2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3....;
832 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1
833 ... ...
834 ... ...
835
836 The current release of MayaChemTools generates the following types of topological atom torsions
837 fingerprints vector strings:
838
839 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3
840 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-
841 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO
842 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...;
843 ;2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1
844
845 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3
846 3;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3
847 .BO4-C.X3.BO4 2 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 2 C.X2.BO2.H
848 2-C.X2.BO2.H2-C.X3.BO3.H1-C.X2.BO2.H2 1 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.B
849 O3.H1-O.X1.BO1.H1 1 C.X2.BO2.H2-C.X2.BO2.H2-N.X3.BO3-C.X3.BO4 2 C.X2.B
850 O2.H2-C.X3.BO3.H1-C.X2.BO2.H2-C.X3.BO3.H1 2 C.X2.BO2.H2-C.X3.BO3.H1...
851
852 FingerprintsVector;TopologicalAtomTorsions:DREIDINGAtomTypes;27;Numeri
853 calValues;IDsAndValuesString;C_2-C_3-C_3-C_3 C_2-C_3-C_3-O_3 C_2-C_R-C
854 _R-C_3 C_2-C_R-C_R-C_R C_2-C_R-C_R-N_R C_2-N_3-C_R-C_R C_3-C_3-C_2-O_2
855 C_3-C_3-C_2-O_3 C_3-C_3-C_3-C_3 C_3-C_3-C_3-N_R C_3-C_3-C_3-O_3 C_...;
856 1 1 1 2 1 2 1 1 3 1 3 2 2 2 1 1 1 3 1 2 2 32 2 2 5 3 1
857
858 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica
859 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC
860 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC-
861 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...;
862 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2
863
864 FingerprintsVector;TopologicalAtomTorsions:FunctionalClassAtomTypes;26
865 ;NumericalValues;IDsAndValuesString;Ar-Ar-Ar-Ar Ar-Ar-Ar-Ar.HBA Ar-Ar-
866 Ar-HBD Ar-Ar-Ar-Hal Ar-Ar-Ar-None Ar-Ar-Ar.HBA-Ar Ar-Ar-Ar.HBA-None Ar
867 -Ar-HBD-None Ar-Ar-None-HBA Ar-Ar-None-HBD Ar-Ar-None-None Ar-Ar.H...;
868 32 5 2 2 3 3 3 2 2 2 2 1 2 1 1 1 2 1 1 1 1 3 1 1 1 3
869
870 FingerprintsVector;TopologicalAtomTorsions:MMFF94AtomTypes;43;Numerica
871 lValues;IDsAndValuesString;C5A-C5B-C5B-C5A C5A-C5B-C5B-C=ON C5A-C5B-C5
872 B-CB C5A-C5B-C=ON-NC=O C5A-C5B-C=ON-O=CN C5A-C5B-CB-CB C5A-CB-CB-CB C5
873 A-N5-C5A-C5B C5A-N5-C5A-CB C5A-N5-C5A-CR C5A-N5-CR-CR C5B-C5A-CB-C...;
874 1 1 1 1 1 2 2 2 1 1 2 2 2 2 1 1 2 1 1 2 1 2 1 1 1 2 1 1 1 2 18 2 2 1 1
875 1 1 2 1 1 3 1 3
876
877 FingerprintsVector;TopologicalAtomTorsions:SLogPAtomTypes;49;Numerical
878 Values;IDsAndValuesPairsString;C1-C10-N11-C20 1 C1-C10-N11-C21 1 C1-C1
879 1-C21-C21 2 C1-C11-C21-N11 2 C1-CS-C1-C10 1 C1-CS-C1-C5 1 C1-CS-C1-CS
880 2 C10-C1-CS-O2 1 C10-N11-C20-C20 2 C10-N11-C21-C11 1 C10-N11-C21-C21 1
881 C11-C21-C21-C20 1 C11-C21-C21-C5 1 C11-C21-N11-C20 1 C14-C18-C18-C20
882 2 C18-C14-C18-C18 2 C18-C18-C14-F 2 C18-C18-C18-C18 4 C18-C18-C18-C...
883
884 FingerprintsVector;TopologicalAtomTorsions:SYBYLAtomTypes;26;Numerical
885 Values;IDsAndValuesPairsString;C.2-C.3-C.3-C.3 1 C.2-C.3-C.3-O.3 1 C.2
886 -C.ar-C.ar-C.3 1 C.2-C.ar-C.ar-C.ar 2 C.2-C.ar-C.ar-N.ar 1 C.2-N.am-C.
887 ar-C.ar 2 C.3-C.3-C.2-O.co2 2 C.3-C.3-C.3-C.3 3 C.3-C.3-C.3-N.ar 1 C.3
888 -C.3-C.3-O.3 3 C.3-C.3-C.ar-C.ar 2 C.3-C.3-C.ar-N.ar 2 C.3-C.3-N.ar-C.
889 ar 2 C.3-C.ar-C.ar-C.ar 1 C.3-C.ar-N.ar-C.3 1 C.3-C.ar-N.ar-C.ar 1 ...
890
891 FingerprintsVector;TopologicalAtomTorsions:TPSAAtomTypes;8;NumericalVa
892 lues;IDsAndValuesPairsString;N21-None-None-None 9 N7-None-None-None 4
893 None-N21-None-None 10 None-N7-None-None 3 None-N7-None-O3 1 None-None-
894 None-None 44 None-None-None-O3 3 None-None-None-O4 5
895
896 FingerprintsVector;TopologicalAtomTorsions:UFFAtomTypes;27;NumericalVa
897 lues;IDsAndValuesPairsString;C_2-C_3-C_3-C_3 1 C_2-C_3-C_3-O_3 1 C_2-C
898 _R-C_R-C_3 1 C_2-C_R-C_R-C_R 2 C_2-C_R-C_R-N_R 1 C_2-N_3-C_R-C_R 2 C_3
899 -C_3-C_2-O_2 1 C_3-C_3-C_2-O_3 1 C_3-C_3-C_3-C_3 3 C_3-C_3-C_3-N_R 1 C
900 _3-C_3-C_3-O_3 3 C_3-C_3-C_R-C_R 2 C_3-C_3-C_R-N_R 2 C_3-C_3-N_R-C_R 2
901 C_3-C_R-C_R-C_R 1 C_3-C_R-N_R-C_3 1 C_3-C_R-N_R-C_R 1 C_3-N_R-C_R-...
902
903 =head1 OPTIONS
904
905 =over 4
906
907 =item B<--AromaticityModel> I<MDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel>
908
909 Specify aromaticity model to use during detection of aromaticity. Possible values in the current
910 release are: I<MDLAromaticityModel, TriposAromaticityModel, MMFFAromaticityModel,
911 ChemAxonBasicAromaticityModel, ChemAxonGeneralAromaticityModel, DaylightAromaticityModel
912 or MayaChemToolsAromaticityModel>. Default value: I<MayaChemToolsAromaticityModel>.
913
914 The supported aromaticity model names along with model specific control parameters
915 are defined in B<AromaticityModelsData.csv>, which is distributed with the current release
916 and is available under B<lib/data> directory. B<Molecule.pm> module retrieves data from
917 this file during class instantiation and makes it available to method B<DetectAromaticity>
918 for detecting aromaticity corresponding to a specific model.
919
920 =item B<-a, --AtomIdentifierType> I<AtomicInvariantsAtomTypes | DREIDINGAtomTypes | EStateAtomTypes | FunctionalClassAtomTypes | MMFF94AtomTypes | SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes>
921
922 Specify atom identifier type to use for assignment of initial atom identifier to non-hydrogen
923 atoms during calculation of topological atom torsions fingerprints. Possible values in the current
924 release are: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
925 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
926 TPSAAtomTypes, UFFAtomTypes>. Default value: I<AtomicInvariantsAtomTypes>.
927
928 =item B<--AtomicInvariantsToUse> I<"AtomicInvariant,AtomicInvariant...">
929
930 This value is used during I<AtomicInvariantsAtomTypes> value of B<a, --AtomIdentifierType>
931 option. It's a list of comma separated valid atomic invariant atom types.
932
933 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB,
934 H, Ar, RA, FC, MN, SM>. Default value: I<AS,X,BO,H,FC>.
935
936 The atomic invariants abbreviations correspond to:
937
938 AS = Atom symbol corresponding to element symbol
939
940 X<n> = Number of non-hydrogen atom neighbors or heavy atoms
941 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms
942 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms
943 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms
944 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms
945 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms
946 H<n> = Number of implicit and explicit hydrogens for atom
947 Ar = Aromatic annotation indicating whether atom is aromatic
948 RA = Ring atom annotation indicating whether atom is a ring
949 FC<+n/-n> = Formal charge assigned to atom
950 MN<n> = Mass number indicating isotope other than most abundant isotope
951 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or
952 3 (triplet)
953
954 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
955
956 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
957
958 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
959 optional. Atom type specification doesn't include atomic invariants with zero or undefined values.
960
961 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
962 are also allowed:
963
964 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
965 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
966 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
967 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
968 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
969 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
970 H : NumOfImplicitAndExplicitHydrogens
971 Ar : Aromatic
972 RA : RingAtom
973 FC : FormalCharge
974 MN : MassNumber
975 SM : SpinMultiplicity
976
977 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant
978 atom types.
979
980 =item B<--FunctionalClassesToUse> I<"FunctionalClass1,FunctionalClass2...">
981
982 This value is used during I<FunctionalClassAtomTypes> value of B<a, --AtomIdentifierType>
983 option. It's a list of comma separated valid functional classes.
984
985 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
986 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>.
987
988 The functional class abbreviations correspond to:
989
990 HBD: HydrogenBondDonor
991 HBA: HydrogenBondAcceptor
992 PI : PositivelyIonizable
993 NI : NegativelyIonizable
994 Ar : Aromatic
995 Hal : Halogen
996 H : Hydrophobic
997 RA : RingAtom
998 CA : ChainAtom
999
1000 Functional class atom type specification for an atom corresponds to:
1001
1002 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
1003
1004 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom
1005 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]:
1006
1007 HydrogenBondDonor: NH, NH2, OH
1008 HydrogenBondAcceptor: N[!H], O
1009 PositivelyIonizable: +, NH2
1010 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
1011
1012 =item B<--CompoundID> I<DataFieldName or LabelPrefixString>
1013
1014 This value is B<--CompoundIDMode> specific and indicates how compound ID is generated.
1015
1016 For I<DataField> value of B<--CompoundIDMode> option, it corresponds to datafield label name
1017 whose value is used as compound ID; otherwise, it's a prefix string used for generating compound
1018 IDs like LabelPrefixString<Number>. Default value, I<Cmpd>, generates compound IDs which
1019 look like Cmpd<Number>.
1020
1021 Examples for I<DataField> value of B<--CompoundIDMode>:
1022
1023 MolID
1024 ExtReg
1025
1026 Examples for I<LabelPrefix> or I<MolNameOrLabelPrefix> value of B<--CompoundIDMode>:
1027
1028 Compound
1029
1030 The value specified above generates compound IDs which correspond to Compound<Number>
1031 instead of default value of Cmpd<Number>.
1032
1033 =item B<--CompoundIDLabel> I<text>
1034
1035 Specify compound ID column label for CSV/TSV text file(s) used during I<CompoundID> value
1036 of B<--DataFieldsMode> option. Default value: I<CompoundID>.
1037
1038 =item B<--CompoundIDMode> I<DataField | MolName | LabelPrefix | MolNameOrLabelPrefix>
1039
1040 Specify how to generate compound IDs and write to FP or CSV/TSV text file(s) along with generated
1041 fingerprints for I<FP | text | all> values of B<--output> option: use a I<SDFile(s)> datafield value;
1042 use molname line from I<SDFile(s)>; generate a sequential ID with specific prefix; use combination
1043 of both MolName and LabelPrefix with usage of LabelPrefix values for empty molname lines.
1044
1045 Possible values: I<DataField | MolName | LabelPrefix | MolNameOrLabelPrefix>.
1046 Default value: I<LabelPrefix>.
1047
1048 For I<MolNameAndLabelPrefix> value of B<--CompoundIDMode>, molname line in I<SDFile(s)> takes
1049 precedence over sequential compound IDs generated using I<LabelPrefix> and only empty molname
1050 values are replaced with sequential compound IDs.
1051
1052 This is only used for I<CompoundID> value of B<--DataFieldsMode> option.
1053
1054 =item B<--DataFields> I<"FieldLabel1,FieldLabel2,...">
1055
1056 Comma delimited list of I<SDFiles(s)> data fields to extract and write to CSV/TSV text file(s) along
1057 with generated fingerprints for I<text | all> values of B<--output> option.
1058
1059 This is only used for I<Specify> value of B<--DataFieldsMode> option.
1060
1061 Examples:
1062
1063 Extreg
1064 MolID,CompoundName
1065
1066 =item B<-d, --DataFieldsMode> I<All | Common | Specify | CompoundID>
1067
1068 Specify how data fields in I<SDFile(s)> are transferred to output CSV/TSV text file(s) along
1069 with generated fingerprints for I<text | all> values of B<--output> option: transfer all SD
1070 data field; transfer SD data files common to all compounds; extract specified data fields;
1071 generate a compound ID using molname line, a compound prefix, or a combination of both.
1072 Possible values: I<All | Common | specify | CompoundID>. Default value: I<CompoundID>.
1073
1074 =item B<-f, --Filter> I<Yes | No>
1075
1076 Specify whether to check and filter compound data in SDFile(s). Possible values: I<Yes or No>.
1077 Default value: I<Yes>.
1078
1079 By default, compound data is checked before calculating fingerprints and compounds containing
1080 atom data corresponding to non-element symbols or no atom data are ignored.
1081
1082 =item B<--FingerprintsLabel> I<text>
1083
1084 SD data label or text file column label to use for fingerprints string in output SD or
1085 CSV/TSV text file(s) specified by B<--output>. Default value: I<TopologicalAtomTorsionsFingerprints>.
1086
1087 =item B<-h, --help>
1088
1089 Print this help message.
1090
1091 =item B<-k, --KeepLargestComponent> I<Yes | No>
1092
1093 Generate fingerprints for only the largest component in molecule. Possible values:
1094 I<Yes or No>. Default value: I<Yes>.
1095
1096 For molecules containing multiple connected components, fingerprints can be generated
1097 in two different ways: use all connected components or just the largest connected
1098 component. By default, all atoms except for the largest connected component are
1099 deleted before generation of fingerprints.
1100
1101 =item B<--OutDelim> I<comma | tab | semicolon>
1102
1103 Delimiter for output CSV/TSV text file(s). Possible values: I<comma, tab, or semicolon>
1104 Default value: I<comma>.
1105
1106 =item B<--output> I<SD | FP | text | all>
1107
1108 Type of output files to generate. Possible values: I<SD, FP, text, or all>. Default value: I<text>.
1109
1110 =item B<-o, --overwrite>
1111
1112 Overwrite existing files.
1113
1114 =item B<-q, --quote> I<Yes | No>
1115
1116 Put quote around column values in output CSV/TSV text file(s). Possible values:
1117 I<Yes or No>. Default value: I<Yes>
1118
1119 =item B<-r, --root> I<RootName>
1120
1121 New file name is generated using the root: <Root>.<Ext>. Default for new file names:
1122 <SDFileName><TopologicalAtomTorsionsFP>.<Ext>. The file type determines <Ext> value.
1123 The sdf, fpf, csv, and tsv <Ext> values are used for SD, FP, comma/semicolon, and tab
1124 delimited text files, respectively.This option is ignored for multiple input files.
1125
1126 =item B<-v, --VectorStringFormat> I<IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString | ValuesAndIDsPairsString>
1127
1128 Format of fingerprints vector string data in output SD, FP or CSV/TSV text file(s) specified by
1129 B<--output> option. Possible values: I<IDsAndValuesString | IDsAndValuesPairsString | ValuesAndIDsString |
1130 ValuesAndIDsPairsString>. Defaultvalue: I<IDsAndValuesString>.
1131
1132 Examples:
1133
1134 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3
1135 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-
1136 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO
1137 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...;
1138 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1
1139
1140 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3
1141 3;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3
1142 .BO4-C.X3.BO4 2 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 2 C.X2.BO2.H
1143 2-C.X2.BO2.H2-C.X3.BO3.H1-C.X2.BO2.H2 1 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.B
1144 O3.H1-O.X1.BO1.H1 1 C.X2.BO2.H2-C.X2.BO2.H2-N.X3.BO3-C.X3.BO4 2 C.X2.B
1145 O2.H2-C.X3.BO3.H1-C.X2.BO2.H2-C.X3.BO3.H1 2 C.X2.BO2.H2-C.X3.BO3.H1...
1146
1147 =item B<-w, --WorkingDir> I<DirName>
1148
1149 Location of working directory. Default value: current directory.
1150
1151 =back
1152
1153 =head1 EXAMPLES
1154
1155 To generate topological atom torsions fingerprints using atomic invariants atom types in
1156 IDsAndValuesString format and create a SampleTATFP.csv file containing sequential
1157 compound IDs along with fingerprints vector strings data, type:
1158
1159 % TopologicalAtomTorsionsFingerprints.pl -r SampleTATFP -o Sample.sdf
1160
1161 To generate topological atom torsions fingerprints using atomic invariants atom types in
1162 IDsAndValuesString format and create SampleTATFP.sdf, SampleTATFP.fpf and SampleTATFP.csv
1163 files containing sequential compound IDs in CSV file along with fingerprints vector strings
1164 data, type:
1165
1166 % TopologicalAtomTorsionsFingerprints.pl --output all -r SampleTATFP
1167 -o Sample.sdf
1168
1169 To generate topological atom torsions fingerprints using atomic invariants atom types in
1170 IDsAndValuesPairsString format and create a SampleTATFP.csv file containing sequential
1171 compound IDs along with fingerprints vector strings data, type:
1172
1173 % TopologicalAtomTorsionsFingerprints.pl --VectorStringFormat
1174 IDsAndValuesPairsString -r SampleTATFP -o Sample.sdf
1175
1176 To generate topological atom torsions fingerprints using DREIDING atom types in
1177 IDsAndValuesString format and create a SampleTATFP.csv file containing sequential
1178 compound IDs along with fingerprints vector strings data, type:
1179
1180 % TopologicalAtomTorsionsFingerprints.pl -a DREIDINGAtomTypes
1181 -r SampleTATFP -o Sample.sdf
1182
1183 To generate topological atom torsions fingerprints using E-state atom types in
1184 IDsAndValuesString format and create a SampleTATFP.csv file containing sequential
1185 compound IDs along with fingerprints vector strings data, type:
1186
1187 % TopologicalAtomTorsionsFingerprints.pl -a EStateAtomTypes
1188 -r SampleTATFP -o Sample.sdf
1189
1190 To generate topological atom torsions fingerprints using functional class atom types in
1191 IDsAndValuesString format and create a SampleTATFP.csv file containing sequential
1192 compound IDs along with fingerprints vector strings data, type:
1193
1194 % TopologicalAtomTorsionsFingerprints.pl -a FunctionalClassAtomTypes
1195 -r SampleTATFP -o Sample.sdf
1196
1197 To generate topological atom torsions fingerprints using MMFF94 atom types in
1198 IDsAndValuesString format and create a SampleTATFP.csv file containing sequential
1199 compound IDs along with fingerprints vector strings data, type:
1200
1201 % TopologicalAtomTorsionsFingerprints.pl -a MMFF94AtomTypes
1202 -r SampleTATFP -o Sample.sdf
1203
1204 To generate topological atom torsions fingerprints using SLogP atom types in
1205 IDsAndValuesString format and create a SampleTATFP.csv file containing sequential
1206 compound IDs along with fingerprints vector strings data, type:
1207
1208 % TopologicalAtomTorsionsFingerprints.pl -a SLogPAtomTypes
1209 -r SampleTATFP -o Sample.sdf
1210
1211 To generate topological atom torsions fingerprints using SYBYL atom types in
1212 IDsAndValuesString format and create a SampleTATFP.csv file containing sequential
1213 compound IDs along with fingerprints vector strings data, type:
1214
1215 % TopologicalAtomTorsionsFingerprints.pl -a SYBYLAtomTypes
1216 -r SampleTATFP -o Sample.sdf
1217
1218 To generate topological atom torsions fingerprints using TPSA atom types in
1219 IDsAndValuesString format and create a SampleTATFP.csv file containing sequential
1220 compound IDs along with fingerprints vector strings data, type:
1221
1222 % TopologicalAtomTorsionsFingerprints.pl -a TPSAAtomTypes
1223 -r SampleTATFP -o Sample.sdf
1224
1225 To generate topological atom torsions fingerprints using UFF atom types in
1226 IDsAndValuesString format and create a SampleTATFP.csv file containing sequential
1227 compound IDs along with fingerprints vector strings data, type:
1228
1229 % TopologicalAtomTorsionsFingerprints.pl -a UFFAtomTypes
1230 -r SampleTATFP -o Sample.sdf
1231
1232 To generate topological atom torsions fingerprints using only AS,X atomic invariants atom
1233 types in IDsAndValuesString format and create a SampleTATFP.csv file containing sequential
1234 compound IDs along with fingerprints vector strings data, type:
1235
1236 % TopologicalAtomTorsionsFingerprints.pl -a AtomicInvariantsAtomTypes
1237 --AtomicInvariantsToUse "AS,X" -r SampleTATFP -o Sample.sdf
1238
1239 To generate topological atom torsions fingerprints using atomic invariants atom types in
1240 IDsAndValuesString format and create a SampleTATFP.csv file containing compoundID
1241 from molecule name line along with fingerprints vector strings, type:
1242
1243 % TopologicalAtomTorsionsFingerprints.pl -a AtomicInvariantsAtomTypes
1244 --DataFieldsMode CompoundID -CompoundIDMode MolName
1245 -r SampleTATFP -o Sample.sdf
1246
1247 To generate topological atom torsions fingerprints using atomic invariants atom types in
1248 IDsAndValuesString format and create a SampleTATFP.csv file containing compound IDs
1249 using specified data field along with fingerprints vector strings, type:
1250
1251 % TopologicalAtomTorsionsFingerprints.pl -a AtomicInvariantsAtomTypes
1252 --DataFieldsMode CompoundID -CompoundIDMode DataField --CompoundID
1253 Mol_ID -r SampleTATFP -o Sample.sdf
1254
1255 To generate topological atom torsions fingerprints using atomic invariants atom types in
1256 IDsAndValuesString format and create a SampleTATFP.csv file containing compound ID
1257 using combination of molecule name line and an explicit compound prefix along with
1258 fingerprints vector strings data, type:
1259
1260 % TopologicalAtomTorsionsFingerprints.pl -a AtomicInvariantsAtomTypes
1261 --DataFieldsMode CompoundID -CompoundIDMode MolnameOrLabelPrefix
1262 --CompoundID Cmpd --CompoundIDLabel MolID -r SampleTATFP -o Sample.sdf
1263
1264 To generate topological atom torsions fingerprints using atomic invariants atom types in
1265 IDsAndValuesString format and create a SampleTATFP.csv file containing specific data
1266 fields columns along with fingerprints vector strings, type:
1267
1268 % TopologicalAtomTorsionsFingerprints.pl -a AtomicInvariantsAtomTypes
1269 --DataFieldsMode Specify --DataFields Mol_ID -r SampleTATFP
1270 -o Sample.sdf
1271
1272 To generate topological atom torsions fingerprints using atomic invariants atom types in
1273 IDsAndValuesString format and create a SampleTATFP.csv file containing common
1274 data fields columns along with fingerprints vector strings, type:
1275
1276 % TopologicalAtomTorsionsFingerprints.pl -a AtomicInvariantsAtomTypes
1277 --DataFieldsMode Common -r SampleTATFP -o Sample.sdf
1278
1279 To generate topological atom torsions fingerprints using atomic invariants atom types in
1280 IDsAndValuesString format and create SampleTATFP.sdf, SampleTATFP.fpf and SampleTATFP.csv
1281 files containing all data fields columns in CSV file along with fingerprints data, type:
1282
1283 % TopologicalAtomTorsionsFingerprints.pl -a AtomicInvariantsAtomTypes
1284 --DataFieldsMode All --output all -r SampleTATFP
1285 -o Sample.sdf
1286
1287
1288 =head1 AUTHOR
1289
1290 Manish Sud <msud@san.rr.com>
1291
1292 =head1 SEE ALSO
1293
1294 InfoFingerprintsFiles.pl, SimilarityMatricesFingerprints.pl, AtomNeighborhoodsFingerprints.pl,
1295 ExtendedConnectivityFingerprints.pl, MACCSKeysFingerprints.pl,
1296 PathLengthFingerprints.pl, TopologicalAtomPairsFingerprints.pl,
1297 TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl
1298
1299 =head1 COPYRIGHT
1300
1301 Copyright (C) 2015 Manish Sud. All rights reserved.
1302
1303 This file is part of MayaChemTools.
1304
1305 MayaChemTools is free software; you can redistribute it and/or modify it under
1306 the terms of the GNU Lesser General Public License as published by the Free
1307 Software Foundation; either version 3 of the License, or (at your option)
1308 any later version.
1309
1310 =cut