comparison bin/AtomNeighborhoodsFingerprints.pl @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 #!/usr/bin/perl -w
2 #
3 # $RCSfile: AtomNeighborhoodsFingerprints.pl,v $
4 # $Date: 2015/02/28 20:46:19 $
5 # $Revision: 1.31 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use FindBin; use lib "$FindBin::Bin/../lib";
31 use Getopt::Long;
32 use File::Basename;
33 use Text::ParseWords;
34 use Benchmark;
35 use FileUtil;
36 use TextUtil;
37 use SDFileUtil;
38 use MoleculeFileIO;
39 use FileIO::FingerprintsSDFileIO;
40 use FileIO::FingerprintsTextFileIO;
41 use FileIO::FingerprintsFPFileIO;
42 use AtomTypes::AtomicInvariantsAtomTypes;
43 use AtomTypes::FunctionalClassAtomTypes;
44 use Fingerprints::AtomNeighborhoodsFingerprints;
45
46 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
47
48 # Autoflush STDOUT
49 $| = 1;
50
51 # Starting message...
52 $ScriptName = basename($0);
53 print "\n$ScriptName: Starting...\n\n";
54 $StartTime = new Benchmark;
55
56 # Get the options and setup script...
57 SetupScriptUsage();
58 if ($Options{help} || @ARGV < 1) {
59 die GetUsageFromPod("$FindBin::Bin/$ScriptName");
60 }
61
62 my(@SDFilesList);
63 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
64
65 # Process options...
66 print "Processing options...\n";
67 my(%OptionsInfo);
68 ProcessOptions();
69
70 # Setup information about input files...
71 print "Checking input SD file(s)...\n";
72 my(%SDFilesInfo);
73 RetrieveSDFilesInfo();
74
75 # Process input files..
76 my($FileIndex);
77 if (@SDFilesList > 1) {
78 print "\nProcessing SD files...\n";
79 }
80 for $FileIndex (0 .. $#SDFilesList) {
81 if ($SDFilesInfo{FileOkay}[$FileIndex]) {
82 print "\nProcessing file $SDFilesList[$FileIndex]...\n";
83 GenerateAtomNeighborhoodsFingerprints($FileIndex);
84 }
85 }
86 print "\n$ScriptName:Done...\n\n";
87
88 $EndTime = new Benchmark;
89 $TotalTime = timediff ($EndTime, $StartTime);
90 print "Total time: ", timestr($TotalTime), "\n";
91
92 ###############################################################################
93
94 # Generate fingerprints for a SD file...
95 #
96 sub GenerateAtomNeighborhoodsFingerprints {
97 my($FileIndex) = @_;
98 my($CmpdCount, $IgnoredCmpdCount, $SDFile, $MoleculeFileIO, $Molecule, $AtomNeighborhoodsFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
99
100 $SDFile = $SDFilesList[$FileIndex];
101
102 # Setup output files...
103 #
104 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = SetupAndOpenOutputFiles($FileIndex);
105
106 $MoleculeFileIO = new MoleculeFileIO('Name' => $SDFile);
107 $MoleculeFileIO->Open();
108
109 $CmpdCount = 0;
110 $IgnoredCmpdCount = 0;
111
112 COMPOUND: while ($Molecule = $MoleculeFileIO->ReadMolecule()) {
113 $CmpdCount++;
114
115 # Filter compound data before calculating fingerprints...
116 if ($OptionsInfo{Filter}) {
117 if (CheckAndFilterCompound($CmpdCount, $Molecule)) {
118 $IgnoredCmpdCount++;
119 next COMPOUND;
120 }
121 }
122
123 $AtomNeighborhoodsFingerprints = GenerateMoleculeFingerprints($Molecule);
124 if (!$AtomNeighborhoodsFingerprints) {
125 $IgnoredCmpdCount++;
126 ProcessIgnoredCompound('FingerprintsGenerationFailed', $CmpdCount, $Molecule);
127 next COMPOUND;
128 }
129
130 WriteDataToOutputFiles($FileIndex, $CmpdCount, $Molecule, $AtomNeighborhoodsFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
131 }
132 $MoleculeFileIO->Close();
133
134 if ($NewFPSDFileIO) {
135 $NewFPSDFileIO->Close();
136 }
137 if ($NewFPTextFileIO) {
138 $NewFPTextFileIO->Close();
139 }
140 if ($NewFPFileIO) {
141 $NewFPFileIO->Close();
142 }
143
144 WriteFingerprintsGenerationSummaryStatistics($CmpdCount, $IgnoredCmpdCount);
145 }
146
147 # Process compound being ignored due to problems in fingerprints geneation...
148 #
149 sub ProcessIgnoredCompound {
150 my($Mode, $CmpdCount, $Molecule) = @_;
151 my($CmpdID, $DataFieldLabelAndValuesRef);
152
153 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
154 $CmpdID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
155
156 MODE: {
157 if ($Mode =~ /^ContainsNonElementalData$/i) {
158 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains atom data corresponding to non-elemental atom symbol(s)...\n\n";
159 next MODE;
160 }
161
162 if ($Mode =~ /^ContainsNoElementalData$/i) {
163 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains no atom data...\n\n";
164 next MODE;
165 }
166
167 if ($Mode =~ /^FingerprintsGenerationFailed$/i) {
168 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n";
169 next MODE;
170 }
171 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n";
172 }
173 }
174
175 # Check and filter compounds....
176 #
177 sub CheckAndFilterCompound {
178 my($CmpdCount, $Molecule) = @_;
179 my($ElementCount, $NonElementCount);
180
181 ($ElementCount, $NonElementCount) = $Molecule->GetNumOfElementsAndNonElements();
182
183 if ($NonElementCount) {
184 ProcessIgnoredCompound('ContainsNonElementalData', $CmpdCount, $Molecule);
185 return 1;
186 }
187
188 if (!$ElementCount) {
189 ProcessIgnoredCompound('ContainsNoElementalData', $CmpdCount, $Molecule);
190 return 1;
191 }
192
193 return 0;
194 }
195
196 # Write out compounds fingerprints generation summary statistics...
197 #
198 sub WriteFingerprintsGenerationSummaryStatistics {
199 my($CmpdCount, $IgnoredCmpdCount) = @_;
200 my($ProcessedCmpdCount);
201
202 $ProcessedCmpdCount = $CmpdCount - $IgnoredCmpdCount;
203
204 print "\nNumber of compounds: $CmpdCount\n";
205 print "Number of compounds processed successfully during fingerprints generation: $ProcessedCmpdCount\n";
206 print "Number of compounds ignored during fingerprints generation: $IgnoredCmpdCount\n";
207 }
208
209 # Open output files...
210 #
211 sub SetupAndOpenOutputFiles {
212 my($FileIndex) = @_;
213 my($NewFPSDFile, $NewFPFile, $NewFPTextFile, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO, %FingerprintsFileIOParams);
214
215 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = (undef) x 3;
216
217 # Setup common parameters for fingerprints file IO objects...
218 #
219 %FingerprintsFileIOParams = ('Mode' => 'Write', 'Overwrite' => $OptionsInfo{OverwriteFiles}, 'FingerprintsStringMode' => 'FingerprintsVectorString', 'VectorStringFormat' => $OptionsInfo{VectorStringFormat});
220
221 if ($OptionsInfo{SDOutput}) {
222 $NewFPSDFile = $SDFilesInfo{SDOutFileNames}[$FileIndex];
223 print "Generating SD file $NewFPSDFile...\n";
224 $NewFPSDFileIO = new FileIO::FingerprintsSDFileIO('Name' => $NewFPSDFile, %FingerprintsFileIOParams, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsLabel});
225 $NewFPSDFileIO->Open();
226 }
227
228 if ($OptionsInfo{FPOutput}) {
229 $NewFPFile = $SDFilesInfo{FPOutFileNames}[$FileIndex];
230 print "Generating FP file $NewFPFile...\n";
231 $NewFPFileIO = new FileIO::FingerprintsFPFileIO('Name' => $NewFPFile, %FingerprintsFileIOParams);
232 $NewFPFileIO->Open();
233 }
234
235 if ($OptionsInfo{TextOutput}) {
236 my($ColLabelsRef);
237
238 $NewFPTextFile = $SDFilesInfo{TextOutFileNames}[$FileIndex];
239 $ColLabelsRef = SetupFPTextFileCoulmnLabels($FileIndex);
240
241 print "Generating text file $NewFPTextFile...\n";
242 $NewFPTextFileIO = new FileIO::FingerprintsTextFileIO('Name' => $NewFPTextFile, %FingerprintsFileIOParams, 'DataColLabels' => $ColLabelsRef, 'OutDelim' => $OptionsInfo{OutDelim}, 'OutQuote' => $OptionsInfo{OutQuote});
243 $NewFPTextFileIO->Open();
244 }
245
246 return ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO);
247 }
248
249 # Write fingerpritns and other data to appropriate output files...
250 #
251 sub WriteDataToOutputFiles {
252 my($FileIndex, $CmpdCount, $Molecule, $AtomNeighborhoodsFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = @_;
253 my($DataFieldLabelAndValuesRef);
254
255 $DataFieldLabelAndValuesRef = undef;
256 if ($NewFPTextFileIO || $NewFPFileIO) {
257 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
258 }
259
260 if ($NewFPSDFileIO) {
261 my($CmpdString);
262
263 $CmpdString = $Molecule->GetInputMoleculeString();
264 $NewFPSDFileIO->WriteFingerprints($AtomNeighborhoodsFingerprints, $CmpdString);
265 }
266
267 if ($NewFPTextFileIO) {
268 my($ColValuesRef);
269
270 $ColValuesRef = SetupFPTextFileCoulmnValues($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
271 $NewFPTextFileIO->WriteFingerprints($AtomNeighborhoodsFingerprints, $ColValuesRef);
272 }
273
274 if ($NewFPFileIO) {
275 my($CompoundID);
276
277 $CompoundID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
278 $NewFPFileIO->WriteFingerprints($AtomNeighborhoodsFingerprints, $CompoundID);
279 }
280 }
281
282 # Generate approriate column labels for FPText output file...
283 #
284 sub SetupFPTextFileCoulmnLabels {
285 my($FileIndex) = @_;
286 my($Line, @ColLabels);
287
288 @ColLabels = ();
289 if ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
290 push @ColLabels, @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
291 }
292 elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
293 push @ColLabels, @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
294 }
295 elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
296 push @ColLabels, @{$OptionsInfo{SpecifiedDataFields}};
297 }
298 elsif ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
299 push @ColLabels, $OptionsInfo{CompoundIDLabel};
300 }
301 # Add fingerprints label...
302 push @ColLabels, $OptionsInfo{FingerprintsLabel};
303
304 return \@ColLabels;
305 }
306
307 # Generate column values FPText output file..
308 #
309 sub SetupFPTextFileCoulmnValues {
310 my($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
311 my(@ColValues);
312
313 @ColValues = ();
314 if ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
315 push @ColValues, SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
316 }
317 elsif ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
318 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
319 }
320 elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
321 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
322 }
323 elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
324 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$OptionsInfo{SpecifiedDataFields}};
325 }
326
327 return \@ColValues;
328 }
329
330 # Generate compound ID for FP and FPText output files..
331 #
332 sub SetupCmpdIDForOutputFiles {
333 my($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
334 my($CmpdID);
335
336 $CmpdID = '';
337 if ($OptionsInfo{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) {
338 my($MolName);
339 $MolName = $Molecule->GetName();
340 $CmpdID = $MolName ? $MolName : "$OptionsInfo{CompoundID}${CmpdCount}";
341 }
342 elsif ($OptionsInfo{CompoundIDMode} =~ /^LabelPrefix$/i) {
343 $CmpdID = "$OptionsInfo{CompoundID}${CmpdCount}";
344 }
345 elsif ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i) {
346 my($SpecifiedDataField);
347 $SpecifiedDataField = $OptionsInfo{CompoundID};
348 $CmpdID = exists $DataFieldLabelAndValuesRef->{$SpecifiedDataField} ? $DataFieldLabelAndValuesRef->{$SpecifiedDataField} : '';
349 }
350 elsif ($OptionsInfo{CompoundIDMode} =~ /^MolName$/i) {
351 $CmpdID = $Molecule->GetName();
352 }
353 return $CmpdID;
354 }
355
356 # Generate fingerprints for molecule...
357 #
358 sub GenerateMoleculeFingerprints {
359 my($Molecule) = @_;
360 my($AtomNeighborhoodsFingerprints);
361
362 if ($OptionsInfo{KeepLargestComponent}) {
363 $Molecule->KeepLargestComponent();
364 }
365 if (!$Molecule->DetectRings()) {
366 return undef;
367 }
368 $Molecule->SetAromaticityModel($OptionsInfo{AromaticityModel});
369 $Molecule->DetectAromaticity();
370
371 $AtomNeighborhoodsFingerprints = new Fingerprints::AtomNeighborhoodsFingerprints('Molecule' => $Molecule, 'MinNeighborhoodRadius' => $OptionsInfo{MinNeighborhoodRadius}, 'MaxNeighborhoodRadius' => $OptionsInfo{MaxNeighborhoodRadius}, 'AtomIdentifierType' => $OptionsInfo{AtomIdentifierType});
372 SetAtomIdentifierTypeValuesToUse($AtomNeighborhoodsFingerprints);
373
374 # Generate fingerprints...
375 $AtomNeighborhoodsFingerprints->GenerateFingerprints();
376
377 # Make sure fingerprints generation is successful...
378 if (!$AtomNeighborhoodsFingerprints->IsFingerprintsGenerationSuccessful()) {
379 return undef;
380 }
381
382 return $AtomNeighborhoodsFingerprints;
383 }
384
385 # Set atom identifier type to use for generating fingerprints...
386 #
387 sub SetAtomIdentifierTypeValuesToUse {
388 my($AtomNeighborhoodsFingerprints) = @_;
389
390 if ($OptionsInfo{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
391 $AtomNeighborhoodsFingerprints->SetAtomicInvariantsToUse(\@{$OptionsInfo{AtomicInvariantsToUse}});
392 }
393 elsif ($OptionsInfo{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
394 $AtomNeighborhoodsFingerprints->SetFunctionalClassesToUse(\@{$OptionsInfo{FunctionalClassesToUse}});
395 }
396 elsif ($OptionsInfo{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
397 # Nothing to do for now...
398 }
399 else {
400 die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n";
401 }
402 }
403
404 # Retrieve information about SD files...
405 #
406 sub RetrieveSDFilesInfo {
407 my($SDFile, $Index, $FileDir, $FileExt, $FileName, $OutFileRoot, $TextOutFileExt, $SDOutFileExt, $FPOutFileExt, $NewSDFileName, $NewFPFileName, $NewTextFileName, $CheckDataField, $CollectDataFields, $AllDataFieldsRef, $CommonDataFieldsRef);
408
409 %SDFilesInfo = ();
410 @{$SDFilesInfo{FileOkay}} = ();
411 @{$SDFilesInfo{OutFileRoot}} = ();
412 @{$SDFilesInfo{SDOutFileNames}} = ();
413 @{$SDFilesInfo{FPOutFileNames}} = ();
414 @{$SDFilesInfo{TextOutFileNames}} = ();
415 @{$SDFilesInfo{AllDataFieldsRef}} = ();
416 @{$SDFilesInfo{CommonDataFieldsRef}} = ();
417
418 $CheckDataField = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) && ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i)) ? 1 : 0;
419 $CollectDataFields = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^(All|Common)$/i)) ? 1 : 0;
420
421 FILELIST: for $Index (0 .. $#SDFilesList) {
422 $SDFile = $SDFilesList[$Index];
423
424 $SDFilesInfo{FileOkay}[$Index] = 0;
425 $SDFilesInfo{OutFileRoot}[$Index] = '';
426 $SDFilesInfo{SDOutFileNames}[$Index] = '';
427 $SDFilesInfo{FPOutFileNames}[$Index] = '';
428 $SDFilesInfo{TextOutFileNames}[$Index] = '';
429
430 $SDFile = $SDFilesList[$Index];
431 if (!(-e $SDFile)) {
432 warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
433 next FILELIST;
434 }
435 if (!CheckFileType($SDFile, "sd sdf")) {
436 warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
437 next FILELIST;
438 }
439
440 if ($CheckDataField) {
441 # Make sure data field exists in SD file..
442 my($CmpdString, $SpecifiedDataField, @CmpdLines, %DataFieldValues);
443
444 @CmpdLines = ();
445 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
446 $CmpdString = ReadCmpdString(\*SDFILE);
447 close SDFILE;
448 @CmpdLines = split "\n", $CmpdString;
449 %DataFieldValues = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
450 $SpecifiedDataField = $OptionsInfo{CompoundID};
451 if (!exists $DataFieldValues{$SpecifiedDataField}) {
452 warn "Warning: Ignoring file $SDFile: Data field value, $SpecifiedDataField, using \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\" doesn't exist\n";
453 next FILELIST;
454 }
455 }
456
457 $AllDataFieldsRef = '';
458 $CommonDataFieldsRef = '';
459 if ($CollectDataFields) {
460 my($CmpdCount);
461 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
462 ($CmpdCount, $AllDataFieldsRef, $CommonDataFieldsRef) = GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
463 close SDFILE;
464 }
465
466 # Setup output file names...
467 $FileDir = ""; $FileName = ""; $FileExt = "";
468 ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile);
469
470 $TextOutFileExt = "csv";
471 if ($Options{outdelim} =~ /^tab$/i) {
472 $TextOutFileExt = "tsv";
473 }
474 $SDOutFileExt = $FileExt;
475 $FPOutFileExt = "fpf";
476
477 if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) {
478 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
479 if ($RootFileName && $RootFileExt) {
480 $FileName = $RootFileName;
481 }
482 else {
483 $FileName = $OptionsInfo{OutFileRoot};
484 }
485 $OutFileRoot = $FileName;
486 }
487 else {
488 $OutFileRoot = "${FileName}AtomNeighborhoodsFP";
489 }
490
491 $NewSDFileName = "${OutFileRoot}.${SDOutFileExt}";
492 $NewFPFileName = "${OutFileRoot}.${FPOutFileExt}";
493 $NewTextFileName = "${OutFileRoot}.${TextOutFileExt}";
494
495 if ($OptionsInfo{SDOutput}) {
496 if ($SDFile =~ /$NewSDFileName/i) {
497 warn "Warning: Ignoring input file $SDFile: Same output, $NewSDFileName, and input file names.\n";
498 print "Specify a different name using \"-r --root\" option or use default name.\n";
499 next FILELIST;
500 }
501 }
502
503 if (!$OptionsInfo{OverwriteFiles}) {
504 # Check SD and text outout files...
505 if ($OptionsInfo{SDOutput}) {
506 if (-e $NewSDFileName) {
507 warn "Warning: Ignoring file $SDFile: The file $NewSDFileName already exists\n";
508 next FILELIST;
509 }
510 }
511 if ($OptionsInfo{FPOutput}) {
512 if (-e $NewFPFileName) {
513 warn "Warning: Ignoring file $SDFile: The file $NewFPFileName already exists\n";
514 next FILELIST;
515 }
516 }
517 if ($OptionsInfo{TextOutput}) {
518 if (-e $NewTextFileName) {
519 warn "Warning: Ignoring file $SDFile: The file $NewTextFileName already exists\n";
520 next FILELIST;
521 }
522 }
523 }
524
525 $SDFilesInfo{FileOkay}[$Index] = 1;
526
527 $SDFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
528 $SDFilesInfo{SDOutFileNames}[$Index] = $NewSDFileName;
529 $SDFilesInfo{FPOutFileNames}[$Index] = $NewFPFileName;
530 $SDFilesInfo{TextOutFileNames}[$Index] = $NewTextFileName;
531
532 $SDFilesInfo{AllDataFieldsRef}[$Index] = $AllDataFieldsRef;
533 $SDFilesInfo{CommonDataFieldsRef}[$Index] = $CommonDataFieldsRef;
534 }
535 }
536
537 # Process option values...
538 sub ProcessOptions {
539 %OptionsInfo = ();
540
541 ProcessAtomIdentifierTypeOptions();
542
543 $OptionsInfo{AromaticityModel} = $Options{aromaticitymodel};
544
545 $OptionsInfo{CompoundIDMode} = $Options{compoundidmode};
546 $OptionsInfo{CompoundIDLabel} = $Options{compoundidlabel};
547 $OptionsInfo{DataFieldsMode} = $Options{datafieldsmode};
548
549 my(@SpecifiedDataFields);
550 @SpecifiedDataFields = ();
551
552 @{$OptionsInfo{SpecifiedDataFields}} = ();
553 $OptionsInfo{CompoundID} = '';
554
555 if ($Options{datafieldsmode} =~ /^CompoundID$/i) {
556 if ($Options{compoundidmode} =~ /^DataField$/i) {
557 if (!$Options{compoundid}) {
558 die "Error: You must specify a value for \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\". \n";
559 }
560 $OptionsInfo{CompoundID} = $Options{compoundid};
561 }
562 elsif ($Options{compoundidmode} =~ /^(LabelPrefix|MolNameOrLabelPrefix)$/i) {
563 $OptionsInfo{CompoundID} = $Options{compoundid} ? $Options{compoundid} : 'Cmpd';
564 }
565 }
566 elsif ($Options{datafieldsmode} =~ /^Specify$/i) {
567 if (!$Options{datafields}) {
568 die "Error: You must specify a value for \"--DataFields\" option in \"Specify\" \"-d, --DataFieldsMode\". \n";
569 }
570 @SpecifiedDataFields = split /\,/, $Options{datafields};
571 push @{$OptionsInfo{SpecifiedDataFields}}, @SpecifiedDataFields;
572 }
573
574 $OptionsInfo{FingerprintsLabel} = $Options{fingerprintslabel} ? $Options{fingerprintslabel} : 'AtomNeighborhoodsFingerprints';
575
576 $OptionsInfo{Filter} = ($Options{filter} =~ /^Yes$/i) ? 1 : 0;
577
578 $OptionsInfo{KeepLargestComponent} = ($Options{keeplargestcomponent} =~ /^Yes$/i) ? 1 : 0;
579
580 $OptionsInfo{MinNeighborhoodRadius} = $Options{minneighborhoodradius};
581 $OptionsInfo{MaxNeighborhoodRadius} = $Options{maxneighborhoodradius};
582
583 $OptionsInfo{Output} = $Options{output};
584 $OptionsInfo{SDOutput} = ($Options{output} =~ /^(SD|All)$/i) ? 1 : 0;
585 $OptionsInfo{FPOutput} = ($Options{output} =~ /^(FP|All)$/i) ? 1 : 0;
586 $OptionsInfo{TextOutput} = ($Options{output} =~ /^(Text|All)$/i) ? 1 : 0;
587
588 $OptionsInfo{OutDelim} = $Options{outdelim};
589 $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0;
590
591 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
592 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
593
594 $OptionsInfo{VectorStringFormat} = 'ValuesString';
595 }
596
597 # Process atom identifier type and related options...
598 #
599 sub ProcessAtomIdentifierTypeOptions {
600
601 $OptionsInfo{AtomIdentifierType} = $Options{atomidentifiertype};
602
603 if ($Options{atomidentifiertype} =~ /^AtomicInvariantsAtomTypes$/i) {
604 ProcessAtomicInvariantsToUseOption();
605 }
606 elsif ($Options{atomidentifiertype} =~ /^FunctionalClassAtomTypes$/i) {
607 ProcessFunctionalClassesToUse();
608 }
609 elsif ($OptionsInfo{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
610 # Nothing to do for now...
611 }
612 else {
613 die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n";
614 }
615 }
616
617 # Process specified atomic invariants to use...
618 #
619 sub ProcessAtomicInvariantsToUseOption {
620 my($AtomicInvariant, $AtomSymbolSpecified, @AtomicInvariantsWords);
621
622 @{$OptionsInfo{AtomicInvariantsToUse}} = ();
623 if (IsEmpty($Options{atomicinvariantstouse})) {
624 die "Error: Atomic invariants value specified using \"--AtomicInvariantsToUse\" option is empty\n";
625 }
626 $AtomSymbolSpecified = 0;
627 @AtomicInvariantsWords = split /\,/, $Options{atomicinvariantstouse};
628 for $AtomicInvariant (@AtomicInvariantsWords) {
629 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($AtomicInvariant)) {
630 die "Error: Atomic invariant specified, $AtomicInvariant, using \"--AtomicInvariantsToUse\" option is not valid...\n ";
631 }
632 if ($AtomicInvariant =~ /^(AS|AtomSymbol)$/i) {
633 $AtomSymbolSpecified = 1;
634 }
635 push @{$OptionsInfo{AtomicInvariantsToUse}}, $AtomicInvariant;
636 }
637 if (!$AtomSymbolSpecified) {
638 die "Error: Atomic invariant, AS or AtomSymbol, must be specified as using \"--AtomicInvariantsToUse\" option...\n ";
639 }
640 }
641
642 # Process specified functional classes invariants to use...
643 #
644 sub ProcessFunctionalClassesToUse {
645 my($FunctionalClass, @FunctionalClassesToUseWords);
646
647 @{$OptionsInfo{FunctionalClassesToUse}} = ();
648 if (IsEmpty($Options{functionalclassestouse})) {
649 die "Error: Functional classes value specified using \"--FunctionalClassesToUse\" option is empty\n";
650 }
651 @FunctionalClassesToUseWords = split /\,/, $Options{functionalclassestouse};
652 for $FunctionalClass (@FunctionalClassesToUseWords) {
653 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($FunctionalClass)) {
654 die "Error: Functional class specified, $FunctionalClass, using \"--FunctionalClassesToUse\" option is not valid...\n ";
655 }
656 push @{$OptionsInfo{FunctionalClassesToUse}}, $FunctionalClass;
657 }
658 }
659
660 # Setup script usage and retrieve command line arguments specified using various options...
661 sub SetupScriptUsage {
662
663 # Retrieve all the options...
664 %Options = ();
665
666 $Options{aromaticitymodel} = 'MayaChemToolsAromaticityModel';
667
668 $Options{atomidentifiertype} = 'AtomicInvariantsAtomTypes';
669 $Options{atomicinvariantstouse} = 'AS,X,BO,H,FC';
670 $Options{functionalclassestouse} = 'HBD,HBA,PI,NI,Ar,Hal';
671
672 $Options{compoundidmode} = 'LabelPrefix';
673 $Options{compoundidlabel} = 'CompoundID';
674 $Options{datafieldsmode} = 'CompoundID';
675
676 $Options{filter} = 'Yes';
677
678 $Options{keeplargestcomponent} = 'Yes';
679
680 $Options{minneighborhoodradius} = 0;
681 $Options{maxneighborhoodradius} = 2;
682
683 $Options{output} = 'text';
684 $Options{outdelim} = 'comma';
685 $Options{quote} = 'yes';
686
687 if (!GetOptions(\%Options, "aromaticitymodel=s", "atomidentifiertype|a=s", "atomicinvariantstouse=s", "functionalclassestouse=s", "compoundid=s", "compoundidlabel=s", "compoundidmode=s", "datafields=s", "datafieldsmode|d=s", "filter|f=s", "fingerprintslabel=s", "help|h", "keeplargestcomponent|k=s", "minneighborhoodradius=s", "maxneighborhoodradius=s", "outdelim=s", "output=s", "overwrite|o", "quote|q=s", "root|r=s", "workingdir|w=s")) {
688 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
689 }
690 if ($Options{workingdir}) {
691 if (! -d $Options{workingdir}) {
692 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
693 }
694 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
695 }
696 if (!Molecule::IsSupportedAromaticityModel($Options{aromaticitymodel})) {
697 my(@SupportedModels) = Molecule::GetSupportedAromaticityModels();
698 die "Error: The value specified, $Options{aromaticitymodel}, for option \"--AromaticityModel\" is not valid. Supported aromaticity models in current release of MayaChemTools: @SupportedModels\n";
699 }
700 if ($Options{atomidentifiertype} !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
701 die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n";
702 }
703 if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
704 die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n";
705 }
706 if ($Options{datafieldsmode} !~ /^(All|Common|Specify|CompoundID)$/i) {
707 die "Error: The value specified, $Options{datafieldsmode}, for option \"-d, --DataFieldsMode\" is not valid. Allowed values: All, Common, Specify or CompoundID\n";
708 }
709 if ($Options{filter} !~ /^(Yes|No)$/i) {
710 die "Error: The value specified, $Options{filter}, for option \"-f, --Filter\" is not valid. Allowed values: Yes or No\n";
711 }
712 if ($Options{keeplargestcomponent} !~ /^(Yes|No)$/i) {
713 die "Error: The value specified, $Options{keeplargestcomponent}, for option \"-k, --KeepLargestComponent\" is not valid. Allowed values: Yes or No\n";
714 }
715 if (!(IsInteger($Options{minneighborhoodradius}) && ($Options{minneighborhoodradius} >= 0))) {
716 die "Error: The value specified, $Options{minneighborhoodradius}, for option \"--MinNeighborhoodRadius\" is not valid. Allowed values: >= 0 \n";
717 }
718 if (!(IsInteger($Options{maxneighborhoodradius}) && ($Options{maxneighborhoodradius} >= 0))) {
719 die "Error: The value specified, $Options{maxneighborhoodradius}, for option \"--MaxNeighborhoodRadius\" is not valid. Allowed values: >= 0 \n";
720 }
721 if ($Options{minneighborhoodradius} > $Options{maxneighborhoodradius}) {
722 die "Error: The value specified, specified, $Options{minneighborhoodradius}, for option \"--MinNeighborhoodRadius\" must be less than the value specified, $Options{maxneighborhoodradius}, for option \"--MaxNeighborhoodRadius\" \n";
723 }
724 if ($Options{output} !~ /^(SD|FP|text|all)$/i) {
725 die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, FP, text, or all\n";
726 }
727 if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
728 die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
729 }
730 if ($Options{quote} !~ /^(Yes|No)$/i) {
731 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n";
732 }
733 if ($Options{outdelim} =~ /semicolon/i && $Options{quote} =~ /^No$/i) {
734 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not allowed with, semicolon value of \"--outdelim\" option: Fingerprints string use semicolon as delimiter for various data fields and must be quoted.\n";
735 }
736 }
737
738 __END__
739
740 =head1 NAME
741
742 AtomNeighborhoodsFingerprints.pl - Generate atom neighborhoods fingerprints for SD files
743
744 =head1 SYNOPSIS
745
746 AtomNeighborhoodsFingerprints.pl SDFile(s)...
747
748 AtomNeighborhoodsFingerprints.pl [B<--AromaticityModel> I<AromaticityModelType>]
749 [B<-a, --AtomIdentifierType> I<AtomicInvariantsAtomTypes |
750 DREIDINGAtomTypes | EStateAtomTypes | MMFF94AtomTypes | SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes>]
751 [B<--AtomicInvariantsToUse> I<"AtomicInvariant,AtomicInvariant...">]
752 [B<--FunctionalClassesToUse> I<"FunctionalClass1,FunctionalClass2...">]
753 [B<--CompoundID> I<DataFieldName or LabelPrefixString>] [B<--CompoundIDLabel> I<text>]
754 [B<--CompoundIDMode>] [B<--DataFields> I<"FieldLabel1,FieldLabel2,...">]
755 [B<-d, --DataFieldsMode> I<All | Common | Specify | CompoundID>] [B<-f, --Filter> I<Yes | No>]
756 [B<--FingerprintsLabel> I<text>] [B<-h, --help>] [B<-k, --KeepLargestComponent> I<Yes | No>]
757 [B<--MinNeighborhoodRadius> I<number>] [B<--MaxNeighborhoodRadius> I<number>]
758 [B<--OutDelim> I<comma | tab | semicolon>] [B<--output> I<SD | FP | text | all>] [B<-o, --overwrite>]
759 [B<-q, --quote> I<Yes | No>] [B<-r, --root> I<RootName>]
760 [B<-w, --WorkingDir> dirname] SDFile(s)...
761
762 =head1 DESCRIPTION
763
764 Generate atom neighborhoods fingerprints [ Ref 53-56, Ref 73 ] for I<SDFile(s)> and create appropriate
765 SD, FP or CSV/TSV text file(s) containing fingerprints vector strings corresponding to molecular fingerprints.
766
767 Multiple SDFile names are separated by spaces. The valid file extensions are I<.sdf>
768 and I<.sd>. All other file names are ignored. All the SD files in a current directory
769 can be specified either by I<*.sdf> or the current directory name.
770
771 The current release of MayaChemTools supports generation of atom neighborhoods fingerprints
772 corresponding to following B<-a, --AtomIdentifierTypes>:
773
774 AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
775 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
776 SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
777
778 Based on the values specified for B<-a, --AtomIdentifierType> and B<--AtomicInvariantsToUse>,
779 initial atom types are assigned to all non-hydrogen atoms in a molecule. Using atom neighborhoods
780 around each non-hydrogen central atom corresponding to radii between specified values
781 B<--MinNeighborhoodRadius> and B<--MaxNeighborhoodRadius>, unique atom types at
782 each radii level are counted and an atom neighborhood identifier is generated.
783
784 The format of an atom neighborhood identifier around a central non-hydrogen atom at a
785 specific radius is:
786
787 NR<n>-<AtomType>-ATC<n>
788
789 NR: Neighborhood radius
790 AtomType: Assigned atom type
791 ATC: Atom type count
792
793 The atom neighborhood identifier for a non-hydrogen central atom corresponding to all specified radii
794 is generated by concatenating neighborhood identifiers at each radii by colon as a delimiter:
795
796 NR<n>-<AtomType>-ATC<n>:NR<n>-<AtomType>-ATC<n>:...
797
798 The atom neighborhood identifiers for all non-hydrogen central atoms at all specified radii are
799 concatenated using space as a delimiter and constitute atom neighborhood fingerprint of the molecule.
800
801 Example of I<SD> file containing atom neighborhood fingerprints string data:
802
803 ... ...
804 ... ...
805 $$$$
806 ... ...
807 ... ...
808 ... ...
809 41 44 0 0 0 0 0 0 0 0999 V2000
810 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
811 ... ...
812 2 3 1 0 0 0 0
813 ... ...
814 M END
815 > <CmpdID>
816 Cmpd1
817
818 > <AtomNeighborhoodsFingerprints>
819 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadiu
820 s0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-ATC1
821 :NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X1.B
822 O1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1
823 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C...
824
825 $$$$
826 ... ...
827 ... ...
828
829 Example of I<FP> file containing atom neighborhood fingerprints string data:
830
831 #
832 # Package = MayaChemTools 7.4
833 # Release Date = Oct 21, 2010
834 #
835 # TimeStamp = Fri Mar 11 14:15:27 2011
836 #
837 # FingerprintsStringType = FingerprintsVector
838 #
839 # Description = AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadiu...
840 # VectorStringFormat = ValuesString
841 # VectorValuesType = AlphaNumericalValues
842 #
843 Cmpd1 41;NR0-C.X1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-A...
844 Cmpd2 23;NR0-C.X1.BO1.H3-ATC1:NR1-C.X2.BO2.H2-ATC1:NR2-C.X3.BO3.H1-A...
845 ... ...
846 ... ..
847
848 Example of CSV I<Text> file containing atom neighborhood fingerprints string data:
849
850 "CompoundID","AtomNeighborhoodsFingerprints"
851 "Cmpd1","FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes
852 :MinRadius0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.B
853 O1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1
854 NR0-C.X1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3
855 .BO4-ATC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1..."
856 ... ...
857 ... ...
858
859 The current release of MayaChemTools generates the following types of atom neighborhoods
860 fingerprints vector strings:
861
862 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi
863 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT
864 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X
865 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A
866 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2
867 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B...
868
869 FingerprintsVector;AtomNeighborhoods:DREIDINGAtomTypes:MinRadius0:MaxR
870 adius2;41;AlphaNumericalValues;ValuesString;NR0-C_2-ATC1:NR1-C_3-ATC1:
871 NR1-O_2-ATC1:NR1-O_3-ATC1:NR2-C_3-ATC1 NR0-C_2-ATC1:NR1-C_R-ATC1:NR1-N
872 _3-ATC1:NR1-O_2-ATC1:NR2-C_R-ATC3 NR0-C_3-ATC1:NR1-C_2-ATC1:NR1-C_3-AT
873 C1:NR2-C_3-ATC1:NR2-O_2-ATC1:NR2-O_3-ATC2 NR0-C_3-ATC1:NR1-C_3-ATC1:NR
874 1-N_R-ATC1:NR2-C_3-ATC1:NR2-C_R-ATC2 NR0-C_3-ATC1:NR1-C_3-ATC1:NR2-...
875
876 FingerprintsVector;AtomNeighborhoods:EStateAtomTypes:MinRadius0:MaxRad
877 ius2;41;AlphaNumericalValues;ValuesString;NR0-aaCH-ATC1:NR1-aaCH-ATC1:
878 NR1-aasC-ATC1:NR2-aaCH-ATC1:NR2-aasC-ATC1:NR2-sF-ATC1 NR0-aaCH-ATC1:NR
879 1-aaCH-ATC1:NR1-aasC-ATC1:NR2-aaCH-ATC1:NR2-aasC-ATC1:NR2-sF-ATC1 NR0-
880 aaCH-ATC1:NR1-aaCH-ATC1:NR1-aasC-ATC1:NR2-aaCH-ATC1:NR2-aasC-ATC2 NR0-
881 aaCH-ATC1:NR1-aaCH-ATC1:NR1-aasC-ATC1:NR2-aaCH-ATC1:NR2-aasC-ATC2 N...
882
883 FingerprintsVector;AtomNeighborhoods:FunctionalClassAtomTypes:MinRadiu
884 s0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-Ar-ATC1:NR1-Ar-
885 ATC1:NR1-Ar.HBA-ATC1:NR1-None-ATC1:NR2-Ar-ATC2:NR2-None-ATC4 NR0-Ar-AT
886 C1:NR1-Ar-ATC2:NR1-Ar.HBA-ATC1:NR2-Ar-ATC5:NR2-None-ATC1 NR0-Ar-ATC1:N
887 R1-Ar-ATC2:NR1-HBD-ATC1:NR2-Ar-ATC2:NR2-None-ATC1 NR0-Ar-ATC1:NR1-Ar-A
888 TC2:NR1-Hal-ATC1:NR2-Ar-ATC2 NR0-Ar-ATC1:NR1-Ar-ATC2:NR1-None-ATC1:...
889
890 FingerprintsVector;AtomNeighborhoods:MMFF94AtomTypes:MinRadius0:MaxRad
891 ius2;41;AlphaNumericalValues;ValuesString;NR0-C5A-ATC1:NR1-C5B-ATC1:NR
892 1-CB-ATC1:NR1-N5-ATC1:NR2-C5A-ATC1:NR2-C5B-ATC1:NR2-CB-ATC3:NR2-CR-ATC
893 1 NR0-C5A-ATC1:NR1-C5B-ATC1:NR1-CR-ATC1:NR1-N5-ATC1:NR2-C5A-ATC1:NR2-C
894 5B-ATC1:NR2-C=ON-ATC1:NR2-CR-ATC3 NR0-C5B-ATC1:NR1-C5A-ATC1:NR1-C5B-AT
895 C1:NR1-C=ON-ATC1:NR2-C5A-ATC1:NR2-CB-ATC1:NR2-CR-ATC1:NR2-N5-ATC1:N...
896
897 FingerprintsVector;AtomNeighborhoods:SLogPAtomTypes:MinRadius0:MaxRadi
898 us2;41;AlphaNumericalValues;ValuesString;NR0-C1-ATC1:NR1-C10-ATC1:NR1-
899 CS-ATC1:NR2-C1-ATC1:NR2-N11-ATC1:NR2-O2-ATC1 NR0-C1-ATC1:NR1-C11-ATC1:
900 NR2-C1-ATC1:NR2-C21-ATC1 NR0-C1-ATC1:NR1-C11-ATC1:NR2-C1-ATC1:NR2-C21-
901 ATC1 NR0-C1-ATC1:NR1-C5-ATC1:NR1-CS-ATC1:NR2-C1-ATC1:NR2-O2-ATC2:NR2-O
902 9-ATC1 NR0-C1-ATC1:NR1-CS-ATC2:NR2-C1-ATC2:NR2-O2-ATC2 NR0-C10-ATC1...
903
904 FingerprintsVector;AtomNeighborhoods:SYBYLAtomTypes:MinRadius0:MaxRadi
905 us2;41;AlphaNumericalValues;ValuesString;NR0-C.2-ATC1:NR1-C.3-ATC1:NR1
906 -O.co2-ATC2:NR2-C.3-ATC1 NR0-C.2-ATC1:NR1-C.ar-ATC1:NR1-N.am-ATC1:NR1-
907 O.2-ATC1:NR2-C.ar-ATC3 NR0-C.3-ATC1:NR1-C.2-ATC1:NR1-C.3-ATC1:NR2-C.3-
908 ATC1:NR2-O.3-ATC1:NR2-O.co2-ATC2 NR0-C.3-ATC1:NR1-C.3-ATC1:NR1-N.ar-AT
909 C1:NR2-C.3-ATC1:NR2-C.ar-ATC2 NR0-C.3-ATC1:NR1-C.3-ATC1:NR2-C.3-ATC...
910
911 FingerprintsVector;AtomNeighborhoods:TPSAAtomTypes:MinRadius0:MaxRadiu
912 s2;41;AlphaNumericalValues;ValuesString;NR0-N21-ATC1:NR1-None-ATC3:NR2
913 -None-ATC5 NR0-N7-ATC1:NR1-None-ATC2:NR2-None-ATC3:NR2-O3-ATC1 NR0-Non
914 e-ATC1:NR1-N21-ATC1:NR1-None-ATC1:NR2-None-ATC3 NR0-None-ATC1:NR1-N21-
915 ATC1:NR1-None-ATC2:NR2-None-ATC6 NR0-None-ATC1:NR1-N21-ATC1:NR1-None-A
916 TC2:NR2-None-ATC6 NR0-None-ATC1:NR1-N7-ATC1:NR1-None-ATC1:NR1-O3-AT...
917
918 FingerprintsVector;AtomNeighborhoods:UFFAtomTypes:MinRadius0:MaxRadius
919 2;41;AlphaNumericalValues;ValuesString;NR0-C_2-ATC1:NR1-C_3-ATC1:NR1-O
920 _2-ATC1:NR1-O_3-ATC1:NR2-C_3-ATC1 NR0-C_2-ATC1:NR1-C_R-ATC1:NR1-N_3-AT
921 C1:NR1-O_2-ATC1:NR2-C_R-ATC3 NR0-C_3-ATC1:NR1-C_2-ATC1:NR1-C_3-ATC1:NR
922 2-C_3-ATC1:NR2-O_2-ATC1:NR2-O_3-ATC2 NR0-C_3-ATC1:NR1-C_3-ATC1:NR1-N_R
923 -ATC1:NR2-C_3-ATC1:NR2-C_R-ATC2 NR0-C_3-ATC1:NR1-C_3-ATC1:NR2-C_3-A...
924
925 =head1 OPTIONS
926
927 =over 4
928
929 =item B<--AromaticityModel> I<MDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel>
930
931 Specify aromaticity model to use during detection of aromaticity. Possible values in the current
932 release are: I<MDLAromaticityModel, TriposAromaticityModel, MMFFAromaticityModel,
933 ChemAxonBasicAromaticityModel, ChemAxonGeneralAromaticityModel, DaylightAromaticityModel
934 or MayaChemToolsAromaticityModel>. Default value: I<MayaChemToolsAromaticityModel>.
935
936 The supported aromaticity model names along with model specific control parameters
937 are defined in B<AromaticityModelsData.csv>, which is distributed with the current release
938 and is available under B<lib/data> directory. B<Molecule.pm> module retrieves data from
939 this file during class instantiation and makes it available to method B<DetectAromaticity>
940 for detecting aromaticity corresponding to a specific model.
941
942 =item B<-a, --AtomIdentifierType> I<AtomicInvariantsAtomTypes | DREIDINGAtomTypes | EStateAtomTypes | FunctionalClassAtomTypes | MMFF94AtomTypes | SLogPAtomTypes | SYBYLAtomTypes | TPSAAtomTypes | UFFAtomTypes>
943
944 Specify atom identifier type to use for assignment of initial atom identifier to non-hydrogen
945 atoms during calculation of atom neighborhoods fingerprints. Possible values in the current
946 release are: I<AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
947 FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
948 TPSAAtomTypes, UFFAtomTypes>. Default value: I<AtomicInvariantsAtomTypes>.
949
950 =item B<--AtomicInvariantsToUse> I<"AtomicInvariant,AtomicInvariant...">
951
952 This value is used during I<AtomicInvariantsAtomTypes> value of B<a, --AtomIdentifierType>
953 option. It's a list of comma separated valid atomic invariant atom types.
954
955 Possible values for atomic invariants are: I<AS, X, BO, LBO, SB, DB, TB,
956 H, Ar, RA, FC, MN, SM>. Default value: I<AS,X,BO,H,FC>.
957
958 The atomic invariants abbreviations correspond to:
959
960 AS = Atom symbol corresponding to element symbol
961
962 X<n> = Number of non-hydrogen atom neighbors or heavy atoms
963 BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms
964 LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms
965 SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms
966 DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms
967 TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms
968 H<n> = Number of implicit and explicit hydrogens for atom
969 Ar = Aromatic annotation indicating whether atom is aromatic
970 RA = Ring atom annotation indicating whether atom is a ring
971 FC<+n/-n> = Formal charge assigned to atom
972 MN<n> = Mass number indicating isotope other than most abundant isotope
973 SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or
974 3 (triplet)
975
976 Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
977
978 AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
979
980 Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
981 optional. Atom type specification doesn't include atomic invariants with zero or undefined values.
982
983 In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
984 are also allowed:
985
986 X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
987 BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
988 LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
989 SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
990 DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
991 TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
992 H : NumOfImplicitAndExplicitHydrogens
993 Ar : Aromatic
994 RA : RingAtom
995 FC : FormalCharge
996 MN : MassNumber
997 SM : SpinMultiplicity
998
999 I<AtomTypes::AtomicInvariantsAtomTypes> module is used to assign atomic invariant
1000 atom types.
1001
1002 =item B<--FunctionalClassesToUse> I<"FunctionalClass1,FunctionalClass2...">
1003
1004 This value is used during I<FunctionalClassAtomTypes> value of B<a, --AtomIdentifierType>
1005 option. It's a list of comma separated valid functional classes.
1006
1007 Possible values for atom functional classes are: I<Ar, CA, H, HBA, HBD, Hal, NI, PI, RA>.
1008 Default value [ Ref 24 ]: I<HBD,HBA,PI,NI,Ar,Hal>.
1009
1010 The functional class abbreviations correspond to:
1011
1012 HBD: HydrogenBondDonor
1013 HBA: HydrogenBondAcceptor
1014 PI : PositivelyIonizable
1015 NI : NegativelyIonizable
1016 Ar : Aromatic
1017 Hal : Halogen
1018 H : Hydrophobic
1019 RA : RingAtom
1020 CA : ChainAtom
1021
1022 Functional class atom type specification for an atom corresponds to:
1023
1024 Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
1025
1026 I<AtomTypes::FunctionalClassAtomTypes> module is used to assign functional class atom
1027 types. It uses following definitions [ Ref 60-61, Ref 65-66 ]:
1028
1029 HydrogenBondDonor: NH, NH2, OH
1030 HydrogenBondAcceptor: N[!H], O
1031 PositivelyIonizable: +, NH2
1032 NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
1033
1034 =item B<--CompoundID> I<DataFieldName or LabelPrefixString>
1035
1036 This value is B<--CompoundIDMode> specific and indicates how compound ID is generated.
1037
1038 For I<DataField> value of B<--CompoundIDMode> option, it corresponds to datafield label name
1039 whose value is used as compound ID; otherwise, it's a prefix string used for generating compound
1040 IDs like LabelPrefixString<Number>. Default value, I<Cmpd>, generates compound IDs which
1041 look like Cmpd<Number>.
1042
1043 Examples for I<DataField> value of B<--CompoundIDMode>:
1044
1045 MolID
1046 ExtReg
1047
1048 Examples for I<LabelPrefix> or I<MolNameOrLabelPrefix> value of B<--CompoundIDMode>:
1049
1050 Compound
1051
1052 The value specified above generates compound IDs which correspond to Compound<Number>
1053 instead of default value of Cmpd<Number>.
1054
1055
1056 =item B<--CompoundIDLabel> I<text>
1057
1058 Specify compound ID column label for FP or CSV/TSV text file(s) used during I<CompoundID> value
1059 of B<--DataFieldsMode> option. Default: I<CompoundID>.
1060
1061 =item B<--CompoundIDMode> I<DataField | MolName | LabelPrefix | MolNameOrLabelPrefix>
1062
1063 Specify how to generate compound IDs and write to FP or CSV/TSV text file(s) along with generated
1064 fingerprints for I<FP | text | all> values of B<--output> option: use a I<SDFile(s)> datafield value;
1065 use molname line from I<SDFile(s)>; generate a sequential ID with specific prefix; use combination
1066 of both MolName and LabelPrefix with usage of LabelPrefix values for empty molname lines.
1067
1068 Possible values: I<DataField | MolName | LabelPrefix | MolNameOrLabelPrefix>.
1069 Default: I<LabelPrefix>.
1070
1071 For I<MolNameAndLabelPrefix> value of B<--CompoundIDMode>, molname line in I<SDFile(s)> takes
1072 precedence over sequential compound IDs generated using I<LabelPrefix> and only empty molname
1073 values are replaced with sequential compound IDs.
1074
1075 This is only used for I<CompoundID> value of B<--DataFieldsMode> option.
1076
1077 =item B<--DataFields> I<"FieldLabel1,FieldLabel2,...">
1078
1079 Comma delimited list of I<SDFiles(s)> data fields to extract and write to CSV/TSV text file(s) along
1080 with generated fingerprints for I<text | all> values of B<--output> option.
1081
1082 This is only used for I<Specify> value of B<--DataFieldsMode> option.
1083
1084 Examples:
1085
1086 Extreg
1087 MolID,CompoundName
1088
1089 =item B<-d, --DataFieldsMode> I<All | Common | Specify | CompoundID>
1090
1091 Specify how data fields in I<SDFile(s)> are transferred to output CSV/TSV text file(s) along
1092 with generated fingerprints for I<text | all> values of B<--output> option: transfer all SD
1093 data field; transfer SD data files common to all compounds; extract specified data fields;
1094 generate a compound ID using molname line, a compound prefix, or a combination of both.
1095 Possible values: I<All | Common | specify | CompoundID>. Default value: I<CompoundID>.
1096
1097 =item B<-f, --Filter> I<Yes | No>
1098
1099 Specify whether to check and filter compound data in SDFile(s). Possible values: I<Yes or No>.
1100 Default value: I<Yes>.
1101
1102 By default, compound data is checked before calculating fingerprints and compounds containing
1103 atom data corresponding to non-element symbols or no atom data are ignored.
1104
1105 =item B<--FingerprintsLabel> I<text>
1106
1107 SD data label or text file column label to use for fingerprints string in output SD or
1108 CSV/TSV text file(s) specified by B<--output>. Default value: I<AtomNeighborhoodsFingerprints>.
1109
1110 =item B<-h, --help>
1111
1112 Print this help message.
1113
1114 =item B<-k, --KeepLargestComponent> I<Yes | No>
1115
1116 Generate fingerprints for only the largest component in molecule. Possible values:
1117 I<Yes or No>. Default value: I<Yes>.
1118
1119 For molecules containing multiple connected components, fingerprints can be generated
1120 in two different ways: use all connected components or just the largest connected
1121 component. By default, all atoms except for the largest connected component are
1122 deleted before generation of fingerprints.
1123
1124 =item B<--MinNeighborhoodRadius> I<number>
1125
1126 Minimum atom neighborhood radius for generating atom neighborhoods. Default value: I<0>.
1127 Valid values: positive integers and less than B<--MaxNeighborhoodRadius>. Neighborhood
1128 radius of zero corresponds to list of non-hydrogen atoms.
1129
1130 =item B<--MaxNeighborhoodRadius> I<number>
1131
1132 Maximum atom neighborhood radius for generating atom neighborhoods. Default value: I<2>.
1133 Valid values: positive integers and greater than B<--MineighborhoodRadius>.
1134
1135 =item B<--OutDelim> I<comma | tab | semicolon>
1136
1137 Delimiter for output CSV/TSV text file(s). Possible values: I<comma, tab, or semicolon>
1138 Default value: I<comma>.
1139
1140 =item B<--output> I<SD | FP | text | all>
1141
1142 Type of output files to generate. Possible values: I<SD, FP, text, or all>. Default value: I<text>.
1143
1144 =item B<-o, --overwrite>
1145
1146 Overwrite existing files.
1147
1148 =item B<-q, --quote> I<Yes | No>
1149
1150 Put quote around column values in output CSV/TSV text file(s). Possible values:
1151 I<Yes or No>. Default value: I<Yes>.
1152
1153 =item B<-r, --root> I<RootName>
1154
1155 New file name is generated using the root: <Root>.<Ext>. Default for new file names:
1156 <SDFileName><AtomNeighborhoodsFP>.<Ext>. The file type determines <Ext>
1157 value. The sdf, fpf, csv, and tsv <Ext> values are used for SD, comma/semicolon, and tab
1158 delimited text files, respectively.This option is ignored for multiple input files.
1159
1160 =item B<-w, --WorkingDir> I<DirName>
1161
1162 Location of working directory. Default: current directory.
1163
1164 =back
1165
1166 =head1 EXAMPLES
1167
1168 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1169 2 using atomic invariants atom types in vector string format and create a SampleANFP.csv
1170 file containing sequential compound IDs along with fingerprints vector strings data, type:
1171
1172 % AtomNeighborhoodsFingerprints.pl -r SampleANFP -o Sample.sdf
1173
1174 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1175 2 using DREIDING atom types in vector string format and create a SampleANFP.csv
1176 file containing sequential compound IDs along with fingerprints vector strings data, type:
1177
1178 % AtomNeighborhoodsFingerprints.pl -a DREIDINGAtomTypes -r SampleANFP
1179 -o Sample.sdf
1180
1181 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1182 2 using EStateAtomTypes types in vector string format and create a SampleANFP.csv
1183 file containing sequential compound IDs along with fingerprints vector strings data, type:
1184
1185 % AtomNeighborhoodsFingerprints.pl -a EStateAtomTypes -r SampleANFP
1186 -o Sample.sdf
1187
1188 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1189 2 using SYBYL atom types in vector string format and create a SampleANFP.csv
1190 file containing sequential compound IDs along with fingerprints vector strings data, type:
1191
1192 % AtomNeighborhoodsFingerprints.pl -a SYBYLAtomTypes -r SampleANFP
1193 -o Sample.sdf
1194
1195 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1196 2 using FunctionalClass atom types in vector string format and create a SampleANFP.csv
1197 file containing sequential compound IDs along with fingerprints vector strings data, type:
1198
1199 % AtomNeighborhoodsFingerprints.pl -a FunctionalClassAtomTypes
1200 -r SampleANFP -o Sample.sdf
1201
1202 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1203 2 using MMFF94 atom types in vector string format and create a SampleANFP.csv
1204 file containing sequential compound IDs along with fingerprints vector strings data, type:
1205
1206 % AtomNeighborhoodsFingerprints.pl -a MMFF94AtomTypes -r SampleANFP
1207 -o Sample.sdf
1208
1209 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1210 2 using SLogP atom types in vector string format and create a SampleANFP.csv
1211 file containing sequential compound IDs along with fingerprints vector strings data, type:
1212
1213 % AtomNeighborhoodsFingerprints.pl -a SLogPAtomTypes -r SampleANFP
1214 -o Sample.sdf
1215
1216 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1217 2 using SYBYL atom types in vector string format and create a SampleANFP.csv
1218 file containing sequential compound IDs along with fingerprints vector strings data, type:
1219
1220 % AtomNeighborhoodsFingerprints.pl -a SYBYLAtomTypes -r SampleANFP
1221 -o Sample.sdf
1222
1223 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1224 2 using TPSA atom types in vector string format and create a SampleANFP.csv
1225 file containing sequential compound IDs along with fingerprints vector strings data, type:
1226
1227 % AtomNeighborhoodsFingerprints.pl -a TPSAAtomTypes -r SampleANFP
1228 -o Sample.sdf
1229
1230 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1231 2 using UFF atom types in vector string format and create a SampleANFP.csv
1232 file containing sequential compound IDs along with fingerprints vector strings data, type:
1233
1234 % AtomNeighborhoodsFingerprints.pl -a UFFAtomTypes -r SampleANFP
1235 -o Sample.sdf
1236
1237 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1238 2 using atomic invariants atom types in vector string format and create SampleANFP.sdf,
1239 SampleANFP.fpf and SampleANFP.csv files containing sequential compound IDs in CSV file along
1240 with fingerprints vector strings data, type:
1241
1242 % AtomNeighborhoodsFingerprints.pl --output all -r SampleANFP
1243 -o Sample.sdf
1244
1245 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 1 to
1246 3 using atomic invariants atom types in vector string format and create a SampleANFP.csv
1247 file containing sequential compound IDs along with fingerprints vector strings data, type:
1248
1249 % AtomNeighborhoodsFingerprints.pl -a AtomicInvariantsAtomTypes
1250 --MinNeighborhoodRadius 1 --MaxNeighborhoodRadius 3 -r SampleANFP
1251 -o Sample.sdf
1252
1253 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1254 2 using only AS,X atomic invariants atom types in vector string format and create a SampleANFP.csv
1255 file containing sequential compound IDs along with fingerprints vector strings data, type:
1256
1257 % AtomNeighborhoodsFingerprints.pl -a AtomicInvariantsAtomTypes
1258 --AtomicInvariantsToUse "AS,X" --MinNeighborhoodRadius 0
1259 --MaxNeighborhoodRadius 3 -r SampleANFP -o Sample.sdf
1260
1261 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1262 2 using atomic invariants atom types in vector string format and create a SampleANFP.csv
1263 file containing compound ID from molecule name line along with fingerprints vector strings data, type:
1264
1265 % AtomNeighborhoodsFingerprints.pl -a AtomicInvariantsAtomTypes
1266 --DataFieldsMode CompoundID --CompoundIDMode MolName
1267 -r SampleANFP -o Sample.sdf
1268
1269 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1270 2 using atomic invariants atom types in vector string format and create a SampleANFP.csv
1271 file containing compound IDs using specified data field along with fingerprints vector strings
1272 data, type:
1273
1274 % AtomNeighborhoodsFingerprints.pl -a AtomicInvariantsAtomTypes
1275 --DataFieldsMode CompoundID --CompoundIDMode DataField --CompoundID
1276 Mol_ID -r SampleANFP -o Sample.sdf
1277
1278 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1279 2 using atomic invariants atom types in vector string format and create a SampleANFP.csv
1280 file containing compound ID using combination of molecule name line and an explicit compound
1281 prefix along with fingerprints vector strings data, type:
1282
1283 % AtomNeighborhoodsFingerprints.pl -a AtomicInvariantsAtomTypes
1284 --DataFieldsMode CompoundID --CompoundIDMode MolnameOrLabelPrefix
1285 --CompoundID Cmpd --CompoundIDLabel MolID -r SampleANFP -o Sample.sdf
1286
1287 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1288 2 using atomic invariants atom types in vector string format and create a SampleANFP.csv
1289 file containing specific data fields columns along with fingerprints vector strings
1290 data, type:
1291
1292 % AtomNeighborhoodsFingerprints.pl -a AtomicInvariantsAtomTypes
1293 --DataFieldsMode Specify --DataFields Mol_ID -r SampleANFP
1294 -o Sample.sdf
1295
1296 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1297 2 using atomic invariants atom types in vector string format and create a SampleANFP.csv
1298 file containing common data fields columns along with fingerprints vector strings
1299 data, type:
1300
1301 % AtomNeighborhoodsFingerprints.pl -a AtomicInvariantsAtomTypes
1302 --DataFieldsMode Common -r SampleANFP -o Sample.sdf
1303
1304 To generate atom neighborhoods fingerprints corresponding to atom neighborhood radii from 0 to
1305 2 using atomic invariants atom types in vector string format and create SampleANFP.sdf,
1306 SampleANFP.fpf and SampleANFP.csv files containing all data fields columns in CSV file along with
1307 fingerprints data, type:
1308
1309 % AtomNeighborhoodsFingerprints.pl -a AtomicInvariantsAtomTypes
1310 --DataFieldsMode All --output all -r SampleANFP
1311 -o Sample.sdf
1312
1313 =head1 AUTHOR
1314
1315 Manish Sud <msud@san.rr.com>
1316
1317 =head1 SEE ALSO
1318
1319 InfoFingerprintsFiles.pl, SimilarityMatricesFingerprints.pl, SimilaritySearchingFingerprints.pl,
1320 ExtendedConnectivityFingerprints.pl, MACCSKeysFingerprints.pl, PathLengthFingerprints.pl,
1321 TopologicalAtomPairsFingerprints.pl, TopologicalAtomTorsionsFingerprints.pl,
1322 TopologicalPharmacophoreAtomPairsFingerprints.pl, TopologicalPharmacophoreAtomTripletsFingerprints.pl
1323
1324 =head1 COPYRIGHT
1325
1326 Copyright (C) 2015 Manish Sud. All rights reserved.
1327
1328 This file is part of MayaChemTools.
1329
1330 MayaChemTools is free software; you can redistribute it and/or modify it under
1331 the terms of the GNU Lesser General Public License as published by the Free
1332 Software Foundation; either version 3 of the License, or (at your option)
1333 any later version.
1334
1335 =cut