Mercurial > repos > deepakjadmin > r_caret_test
comparison mayachemtool/mayachemtools/bin/ElementalAnalysisSDFiles.pl @ 0:68300206e90d draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Thu, 05 Nov 2015 02:41:30 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:68300206e90d |
---|---|
1 #!/usr/bin/perl -w | |
2 # | |
3 # $RCSfile: ElementalAnalysisSDFiles.pl,v $ | |
4 # $Date: 2015/02/28 20:46:19 $ | |
5 # $Revision: 1.24 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use FindBin; use lib "$FindBin::Bin/../lib"; | |
31 use Getopt::Long; | |
32 use File::Basename; | |
33 use Text::ParseWords; | |
34 use Benchmark; | |
35 use FileUtil; | |
36 use SDFileUtil; | |
37 use TextUtil; | |
38 use MolecularFormula; | |
39 use FileIO::SDFileIO; | |
40 use Molecule; | |
41 | |
42 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); | |
43 | |
44 # Autoflush STDOUT | |
45 $| = 1; | |
46 | |
47 # Starting message... | |
48 $ScriptName = basename($0); | |
49 print "\n$ScriptName: Starting...\n\n"; | |
50 $StartTime = new Benchmark; | |
51 | |
52 # Get the options and setup script... | |
53 SetupScriptUsage(); | |
54 if ($Options{help} || @ARGV < 1) { | |
55 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); | |
56 } | |
57 | |
58 my(@SDFilesList); | |
59 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd"); | |
60 | |
61 print "Processing options...\n"; | |
62 my(%OptionsInfo); | |
63 ProcessOptions(); | |
64 | |
65 print "Checking input SD file(s)...\n"; | |
66 my(%SDFilesInfo); | |
67 RetrieveSDFilesInfo(); | |
68 | |
69 # Generate output files... | |
70 my($FileIndex); | |
71 if (@SDFilesList > 1) { | |
72 print "\nProcessing SD files...\n"; | |
73 } | |
74 for $FileIndex (0 .. $#SDFilesList) { | |
75 if ($SDFilesInfo{FileOkay}[$FileIndex]) { | |
76 print "\nProcessing file $SDFilesList[$FileIndex]...\n"; | |
77 PerformElementalAnalysis($FileIndex); | |
78 } | |
79 } | |
80 print "\n$ScriptName:Done...\n\n"; | |
81 | |
82 $EndTime = new Benchmark; | |
83 $TotalTime = timediff ($EndTime, $StartTime); | |
84 print "Total time: ", timestr($TotalTime), "\n"; | |
85 | |
86 ############################################################################### | |
87 | |
88 # Perform analysis... | |
89 sub PerformElementalAnalysis { | |
90 my($Index) = @_; | |
91 my($SDFile, $NewSDFile, $KeyDataFieldName, $CmpdCount, $CurrentFormula, $FormulaFieldName, $CmpdString, $Value, $CalculationType, $CalculatedValue, $ErrorMsg, $Status, $ElementsRef, $ElementCompositionRef, $Molecule, @CalculatedValues, @CmpdLines, %DataFieldValuesMap); | |
92 | |
93 $SDFile = $SDFilesList[$Index]; | |
94 $NewSDFile = $SDFilesInfo{OutFile}[$Index]; | |
95 | |
96 print "Generating new SD file $NewSDFile...\n"; | |
97 open NEWSDFILE, ">$NewSDFile" or die "Error: Couldn't open $NewSDFile: $! \n"; | |
98 open SDFILE, "$SDFile" or die "Error: Can't open $SDFile: $! \n"; | |
99 | |
100 | |
101 $CmpdCount = 0; | |
102 $FormulaFieldName = $SDFilesInfo{FormulaFieldName}[$Index]; | |
103 | |
104 COMPOUND: while ($CmpdString = ReadCmpdString(\*SDFILE)) { | |
105 $CmpdCount++; | |
106 @CmpdLines = split "\n", $CmpdString; | |
107 %DataFieldValuesMap = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); | |
108 | |
109 @CalculatedValues = (); | |
110 for $Value (@{$OptionsInfo{SpecifiedCalculations}}) { | |
111 push @CalculatedValues, ''; | |
112 } | |
113 | |
114 $CurrentFormula = undef; | |
115 | |
116 if ($OptionsInfo{UseStructureData}) { | |
117 $Molecule = FileIO::SDFileIO::ParseMoleculeString($CmpdString); | |
118 $CurrentFormula = $Molecule->GetMolecularFormula(); | |
119 } | |
120 else { | |
121 if (!exists $DataFieldValuesMap{$FormulaFieldName}) { | |
122 $ErrorMsg = "Ignoring compound record $CmpdCount: Formula field $FormulaFieldName not found"; | |
123 PrintErrorMsg($CmpdString, $ErrorMsg); | |
124 WriteNewCompoundRecord($Index, \*NEWSDFILE, \@CmpdLines, \@CalculatedValues); | |
125 next COMPOUND; | |
126 } | |
127 | |
128 # Make sure it's a valid molecular formula... | |
129 $CurrentFormula = $DataFieldValuesMap{$FormulaFieldName}; | |
130 if ($OptionsInfo{CheckFormula}) { | |
131 ($Status, $ErrorMsg) = MolecularFormula::IsMolecularFormula($CurrentFormula); | |
132 if (!$Status) { | |
133 $ErrorMsg = "Ignoring compound record $CmpdCount: Formula field value $CurrentFormula is not valid: $ErrorMsg"; | |
134 PrintErrorMsg($CmpdString, $ErrorMsg); | |
135 WriteNewCompoundRecord($Index, \*NEWSDFILE, \@CmpdLines, \@CalculatedValues); | |
136 next COMPOUND; | |
137 } | |
138 } | |
139 } | |
140 | |
141 # Calculate appropriate values and write 'em out... | |
142 @CalculatedValues = (); | |
143 for $CalculationType (@{$OptionsInfo{SpecifiedCalculations}}) { | |
144 if ($CalculationType =~ /^ElementalAnalysis$/i) { | |
145 ($ElementsRef, $ElementCompositionRef) = MolecularFormula::CalculateElementalComposition($CurrentFormula); | |
146 $CalculatedValue = (defined($ElementsRef) && defined($ElementCompositionRef)) ? MolecularFormula::FormatCompositionInfomation($ElementsRef, $ElementCompositionRef, $OptionsInfo{Precision}) : ''; | |
147 } | |
148 elsif ($CalculationType =~ /^MolecularWeight$/i) { | |
149 $CalculatedValue = MolecularFormula::CalculateMolecularWeight($CurrentFormula); | |
150 $CalculatedValue = (defined($CalculatedValue) && length($CalculatedValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CalculatedValue)) : ""; | |
151 } | |
152 elsif ($CalculationType =~ /^ExactMass$/i) { | |
153 $CalculatedValue = MolecularFormula::CalculateExactMass($CurrentFormula); | |
154 $CalculatedValue = (defined($CalculatedValue) && length($CalculatedValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CalculatedValue)) : ""; | |
155 } | |
156 else { | |
157 $CalculatedValue = ''; | |
158 } | |
159 push @CalculatedValues, $CalculatedValue; | |
160 } | |
161 WriteNewCompoundRecord($Index, \*NEWSDFILE, \@CmpdLines, \@CalculatedValues, $CurrentFormula); | |
162 } | |
163 close NEWSDFILE; | |
164 close SDFILE; | |
165 } | |
166 | |
167 # Write out compound record with calculated values... | |
168 sub WriteNewCompoundRecord { | |
169 my($Index, $SDFileRef, $CmpdLinesRef, $CalculatedValuesRef, $MolecularFormula) = @_; | |
170 | |
171 # Write out compound lines except the last line which contains $$$$... | |
172 my($LineIndex); | |
173 for $LineIndex (0 .. ($#{$CmpdLinesRef} - 1)) { | |
174 print $SDFileRef "$CmpdLinesRef->[$LineIndex]\n"; | |
175 } | |
176 | |
177 # Write out calculated values... | |
178 my($CalcIndex, $FieldName, $FieldValue); | |
179 for $CalcIndex (0 .. $#{$OptionsInfo{SpecifiedCalculations}}) { | |
180 $FieldName = $SDFilesInfo{ValueFieldNamesMap}[$Index]{$OptionsInfo{SpecifiedCalculations}[$CalcIndex]}; | |
181 $FieldValue = $CalculatedValuesRef->[$CalcIndex]; | |
182 print $SDFileRef "> <$FieldName>\n$FieldValue\n\n"; | |
183 } | |
184 | |
185 if ($OptionsInfo{UseStructureData} && $OptionsInfo{WriteOutFormula} && defined($MolecularFormula)) { | |
186 $FieldName = $SDFilesInfo{ValueFieldNamesMap}[$Index]{MolecularFormula}; | |
187 $FieldValue = $MolecularFormula; | |
188 print $SDFileRef "> <$FieldName>\n$FieldValue\n\n"; | |
189 } | |
190 | |
191 print $SDFileRef "\$\$\$\$\n"; | |
192 } | |
193 | |
194 # Print out error message... | |
195 sub PrintErrorMsg { | |
196 my($CmpdString, $ErrorMsg) = @_; | |
197 | |
198 if ($OptionsInfo{DetailLevel} >= 2 ) { | |
199 print "$ErrorMsg:\n$CmpdString\n"; | |
200 } | |
201 elsif ($OptionsInfo{DetailLevel} >= 1) { | |
202 print "$ErrorMsg\n"; | |
203 } | |
204 } | |
205 | |
206 # Retrieve information about input SD files... | |
207 sub RetrieveSDFilesInfo { | |
208 my($Index, $SDFile, $FileDir, $FileName, $FileExt, $OutFileRoot, $OutFile, $FormulaFieldName, $Value, $FieldName, $NewFieldName, $Count); | |
209 | |
210 my(%NewValueFieldNames) = (ElementalAnalysis => 'ElementalAnalysis', MolecularWeight => 'MolecularWeight', ExactMass => 'ExactMass', MolecularFormula => 'MolecularFormula'); | |
211 if (@{$OptionsInfo{SpecifiedValueFieldNames}}) { | |
212 for ($Index = 0; $Index < @{$OptionsInfo{SpecifiedValueFieldNames}}; $Index +=2) { | |
213 $Value = $OptionsInfo{SpecifiedValueFieldNames}[$Index]; | |
214 $FieldName = $OptionsInfo{SpecifiedValueFieldNames}[$Index + 1]; | |
215 if (exists $NewValueFieldNames{$Value}) { | |
216 $NewValueFieldNames{$Value} = $FieldName; | |
217 } | |
218 } | |
219 } | |
220 | |
221 %SDFilesInfo = (); | |
222 | |
223 @{$SDFilesInfo{FileOkay}} = (); | |
224 @{$SDFilesInfo{OutFile}} = (); | |
225 @{$SDFilesInfo{FormulaFieldName}} = (); | |
226 @{$SDFilesInfo{ValueFieldNamesMap}} = (); | |
227 | |
228 FILELIST: for $Index (0 .. $#SDFilesList) { | |
229 $SDFile = $SDFilesList[$Index]; | |
230 | |
231 $SDFilesInfo{FileOkay}[$Index] = 0; | |
232 $SDFilesInfo{OutFile}[$Index] = ''; | |
233 $SDFilesInfo{FormulaFieldName}[$Index] = ''; | |
234 | |
235 %{$SDFilesInfo{ValueFieldNamesMap}[$Index]} = (); | |
236 | |
237 if (!(-e $SDFile)) { | |
238 warn "Warning: Ignoring file $SDFile: It doesn't exist\n"; | |
239 next FILELIST; | |
240 } | |
241 if (!CheckFileType($SDFile, "sd sdf")) { | |
242 warn "Warning: Ignoring file $SDFile: It's not a SD file\n"; | |
243 next FILELIST; | |
244 } | |
245 $FileDir = ""; $FileName = ""; $FileExt = ""; | |
246 ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile); | |
247 if ($Options{root} && (@SDFilesList == 1)) { | |
248 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($Options{root}); | |
249 if ($RootFileName && $RootFileExt) { | |
250 $FileName = $RootFileName; | |
251 } | |
252 else { | |
253 $FileName = $Options{root}; | |
254 } | |
255 $OutFileRoot = $FileName; | |
256 } | |
257 else { | |
258 $OutFileRoot = $FileName . "ElementalAnalysis"; | |
259 } | |
260 | |
261 $OutFile = $OutFileRoot . ".$FileExt"; | |
262 if (lc($OutFile) eq lc($SDFile)) { | |
263 warn "Warning: Ignoring file $SDFile:Output file name, $OutFile, is same as input SD file name, $SDFile\n"; | |
264 next FILELIST; | |
265 } | |
266 if (!$Options{overwrite}) { | |
267 if (-e $OutFile) { | |
268 warn "Warning: Ignoring file $SDFile: The file $OutFile already exists\n"; | |
269 next FILELIST; | |
270 } | |
271 } | |
272 # Get data field names and values... | |
273 my($CmpdString, $FieldName, @CmpdLines, @DataFieldNames, %DataFieldNamesMap); | |
274 @DataFieldNames = (); | |
275 if (!open(SDFILE, "$SDFile")) { | |
276 warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n"; | |
277 next FILELIST; | |
278 } | |
279 $CmpdString = ReadCmpdString(\*SDFILE); | |
280 close SDFILE; | |
281 | |
282 @CmpdLines = split "\n", $CmpdString; | |
283 @DataFieldNames = GetCmpdDataHeaderLabels(\@CmpdLines); | |
284 %DataFieldNamesMap = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); | |
285 | |
286 # Setup formula field name... | |
287 $FormulaFieldName = ''; | |
288 if ($OptionsInfo{UseDataField}) { | |
289 if ($OptionsInfo{SpecifiedFormulaFieldName}) { | |
290 $FormulaFieldName = $OptionsInfo{SpecifiedFormulaFieldName}; | |
291 } | |
292 else { | |
293 FIELDNAME: for $FieldName (@DataFieldNames) { | |
294 if ($FieldName =~ /Formula/i) { | |
295 $FormulaFieldName = $FieldName; | |
296 last FIELDNAME; | |
297 } | |
298 } | |
299 if (!$FormulaFieldName) { | |
300 warn "Warning: Ignoring file $SDFile: Data field label containing the word Formula doesn't exist\n"; | |
301 next FILELIST; | |
302 } | |
303 } | |
304 } | |
305 $SDFilesInfo{FileOkay}[$Index] = 1; | |
306 $SDFilesInfo{OutFile}[$Index] = $OutFile; | |
307 $SDFilesInfo{FormulaFieldName}[$Index] = $FormulaFieldName; | |
308 | |
309 # Setup value data field names for calculated values... | |
310 for $Value (keys %NewValueFieldNames) { | |
311 $FieldName = $NewValueFieldNames{$Value}; | |
312 | |
313 # Make sure it doesn't already exists... | |
314 $Count = 1; | |
315 $NewFieldName = $FieldName; | |
316 while (exists $DataFieldNamesMap{$NewFieldName}) { | |
317 $Count++; | |
318 $NewFieldName = $FieldName . $Count; | |
319 } | |
320 $SDFilesInfo{ValueFieldNamesMap}[$Index]{$Value} = $NewFieldName; | |
321 } | |
322 } | |
323 } | |
324 | |
325 # Process option values... | |
326 sub ProcessOptions { | |
327 %OptionsInfo = (); | |
328 | |
329 $OptionsInfo{Mode} = $Options{mode}; | |
330 $OptionsInfo{FormulaMode} = $Options{formulamode}; | |
331 | |
332 $OptionsInfo{WriteOutFormula} = ($Options{formulaout} =~ /^Yes$/i) ? 1 : 0; | |
333 | |
334 $OptionsInfo{UseStructureData} = ($Options{formulamode} =~ /^StructureData$/i) ? 1 : 0; | |
335 $OptionsInfo{UseDataField} = ($Options{formulamode} =~ /^DataField$/i) ? 1 : 0; | |
336 | |
337 $OptionsInfo{Fast} = defined $Options{fast} ? $Options{fast} : undef; | |
338 | |
339 $OptionsInfo{DetailLevel} = $Options{detail}; | |
340 $OptionsInfo{CheckFormula} = $Options{fast} ? 0 : 1; | |
341 $OptionsInfo{Precision} = $Options{precision}; | |
342 | |
343 $OptionsInfo{Overwrite} = defined $Options{overwrite} ? $Options{overwrite} : undef; | |
344 | |
345 $OptionsInfo{FormulaField} = defined $Options{formulafield} ? $Options{formulafield} : undef; | |
346 $OptionsInfo{SpecifiedFormulaFieldName} = ""; | |
347 | |
348 if (defined $Options{formulafield}) { | |
349 $OptionsInfo{SpecifiedFormulaFieldName} = $Options{formulafield}; | |
350 } | |
351 # Setup what to calculate... | |
352 @{$OptionsInfo{SpecifiedCalculations}} = (); | |
353 if ($Options{mode} =~ /^All$/i) { | |
354 @{$OptionsInfo{SpecifiedCalculations}} = qw(ElementalAnalysis MolecularWeight ExactMass); | |
355 } | |
356 else { | |
357 my($Mode, $ModeValue, @SpecifiedModeValues); | |
358 $Mode = $Options{mode}; | |
359 $Mode =~ s/ //g; | |
360 @SpecifiedModeValues = split /\,/, $Mode; | |
361 for $ModeValue (@SpecifiedModeValues) { | |
362 if ($ModeValue !~ /^(ElementalAnalysis|MolecularWeight|ExactMass)$/i) { | |
363 if ($ModeValue =~ /^All$/i) { | |
364 die "Error: All value for option \"-m --mode\" is not allowed with other valid values.\n"; | |
365 } | |
366 else { | |
367 die "Error: The value specified, $ModeValue, for option \"-m --mode\" is not valid. Allowed values: ElementalAnalysis, MolecularWeight, or ExactMass\n"; | |
368 } | |
369 } | |
370 push @{$OptionsInfo{SpecifiedCalculations}}, $ModeValue; | |
371 } | |
372 } | |
373 | |
374 $OptionsInfo{ValueFieldNames} = defined $Options{valuefieldnames} ? $Options{valuefieldnames} : undef; | |
375 @{$OptionsInfo{SpecifiedValueFieldNames}} = (); | |
376 | |
377 if ($Options{valuefieldnames}) { | |
378 my($Value, $Label, @ValueLabels); | |
379 @ValueLabels = split /\,/, $Options{valuefieldnames}; | |
380 if (@ValueLabels % 2) { | |
381 die "Error: The value specified, $Options{valuefieldnames}, for option \"-v --valuefieldnames\" is not valid: It must contain even number of comma delimited values\n"; | |
382 } | |
383 my($Index); | |
384 for ($Index = 0; $Index < @ValueLabels; $Index +=2) { | |
385 $Value = $ValueLabels[$Index]; | |
386 $Value =~ s/ //g; | |
387 $Label = $ValueLabels[$Index + 1]; | |
388 if ($Value !~ /^(ElementalAnalysis|MolecularWeight|ExactMass|MolecularFormula)$/i) { | |
389 die "Error: The value specified, $Value, using option \"-v --valuefieldnames\" is not valid. Allowed values: ElementalAnalysis, MolecularWeight, ExactMass, or MolecularFormula\n"; | |
390 } | |
391 push @{$OptionsInfo{SpecifiedValueFieldNames}}, ($Value, $Label); | |
392 } | |
393 } | |
394 } | |
395 | |
396 # Setup script usage and retrieve command line arguments specified using various options... | |
397 sub SetupScriptUsage { | |
398 | |
399 # Retrieve all the options... | |
400 %Options = (); | |
401 $Options{detail} = 1; | |
402 $Options{formulamode} = "DataField"; | |
403 $Options{formulaout} = "No"; | |
404 $Options{mode} = "All"; | |
405 $Options{precision} = 2; | |
406 | |
407 if (!GetOptions(\%Options, "detail|d=i", "fast", "formulafield=s", "formulamode|f=s", "formulaout=s", "mode|m=s", "help|h", "overwrite|o", "precision|p=i", "root|r=s", "valuefieldnames|v=s", "workingdir|w=s")) { | |
408 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; | |
409 } | |
410 if ($Options{workingdir}) { | |
411 if (! -d $Options{workingdir}) { | |
412 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; | |
413 } | |
414 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; | |
415 } | |
416 if (!IsPositiveInteger($Options{detail})) { | |
417 die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Allowed values: > 0\n"; | |
418 } | |
419 if ($Options{formulamode} !~ /^(StructureData|DataField)$/i) { | |
420 die "Error: The value specified, $Options{formulamode}, for option \"-f, --formulamode\" is not valid. Allowed values: StructureData or DataField \n"; | |
421 } | |
422 if ($Options{formulaout} !~ /^(Yes|No)$/i) { | |
423 die "Error: The value specified, $Options{formulaout}, for option \"--formulaout\" is not valid. Allowed values: Yes or No \n"; | |
424 } | |
425 if (!IsPositiveInteger($Options{precision})) { | |
426 die "Error: The value specified, $Options{precision}, for option \"-p --precision\" is not valid. Allowed values: > 0 \n"; | |
427 } | |
428 } | |
429 | |
430 __END__ | |
431 | |
432 =head1 NAME | |
433 | |
434 ElementalAnalysisSDFiles.pl - Perform elemental analysis using formula data field in SDFile(s) | |
435 | |
436 =head1 SYNOPSIS | |
437 | |
438 ElementalAnalysisSDFiles.pl SDFile(s)... | |
439 | |
440 ElementalAnalysisSDFiles.pl [B<-d, --detail> infolevel] [B<--fast>] | |
441 [B<--formulafield> SD data field name] [B<-f, --formulamode> I<DataField | StructureData>] | |
442 [B<--formulaout> yes or no] [B<-m, --mode> All | "ElementalAnalysis, [MolecularWeight, ExactMass]"] | |
443 [B<-h, --help>] [B<-o, --overwrite>] [B<-r, --root> rootname] | |
444 [B<-v --valuefieldnames> Name, Label, [Name, Label,...]] [B<-w, --workingdir> dirname] SDFile(s)... | |
445 | |
446 =head1 DESCRIPTION | |
447 | |
448 Perform elemental analysis using molecular formula specified by a data field name or generated | |
449 from structure data in I<SDFile(s)>. | |
450 | |
451 In addition to straightforward molecular formulas - H2O, HCl, C3H7O2N - | |
452 other supported variations are: Ca3(PO4)2, [PCl4]+, [Fe(CN)6]4-, C37H42N2O6+2, Na2CO3.10H2O, | |
453 8H2S.46H2O, and so on. Charges are simply ignored. Isotope symbols in formulas specification, including | |
454 D and T, are not supported. | |
455 | |
456 The file names are separated by space.The valid file extensions are I<.sdf> and I<.sd>. | |
457 All other file names are ignored. All the SD files in a current directory can be specified | |
458 either by I<*.sdf> or the current directory name. | |
459 | |
460 =head1 OPTIONS | |
461 | |
462 =over 4 | |
463 | |
464 =item B<-d, --detail> I<infolevel> | |
465 | |
466 Level of information to print about compound records being ignored. Default: I<1>. Possible | |
467 values: I<1, 2 or 3>. | |
468 | |
469 =item B<--fast> | |
470 | |
471 In this mode, the formula data field specified using B<-f, --formulafield> option is assumed | |
472 to contain valid molecular formula data and initial formula validation check is skipped. | |
473 | |
474 =item B<--formulafield> I<SD data field name> | |
475 | |
476 I<SDFile(s)> data field name containing molecular formulas used for performing | |
477 elemental analysis during I<DataField> value of B<-f, --formulamode> option. | |
478 Default value: I<SD data field containing the word formula in its name>. | |
479 | |
480 This option is ignore during I<StructureData> value of B<-f, --formulamode> option. | |
481 | |
482 =item B<-f, --formulamode> I<DataField | StructureData> | |
483 | |
484 Specify source of molecular formula used for performing elemental analysis: retrieve | |
485 formula using I<SDFile(s)> data field name or generate formula from structure. Possible | |
486 values: I<DataField or StructureData>. Default value: I<DataField>. | |
487 | |
488 =item B<--formulaout> I<yes or no> | |
489 | |
490 Specify whether to write out formula to SD file during I<StructureData> value of | |
491 B<-f, --formulamode> option. Possible values: I<Yes or No>. Default: I<No>. | |
492 | |
493 =item B<-m, --mode> I<All | "ElementalAnalysis,[MolecularWeight,ExactMass]"> | |
494 | |
495 Specify what values to calculate using molecular formula data field or structure data from | |
496 I<SDFile(s)>: calculate all supported values or specify a comma delimited list of values. | |
497 Possible values: I<All | "ElementalAnalysis, [MolecularWeight, ExactMass]">. Default: I<All> | |
498 | |
499 =item B<-h, --help> | |
500 | |
501 Print this help message. | |
502 | |
503 =item B<-o, --overwrite> | |
504 | |
505 Overwrite existing files. | |
506 | |
507 =item B<-p, --precision> I<number> | |
508 | |
509 Precision of calculated values in the output file. Default: up to I<2> decimal places. | |
510 Valid values: positive integers. | |
511 | |
512 =item B<-r, --root> I<rootname> | |
513 | |
514 New SD file name is generated using the root: <Root>.<Ext>. Default new file | |
515 name: <InitialSDFileName>ElementalAnalysis.<Ext>. This option is ignored for multiple | |
516 input files. | |
517 | |
518 =item B<-v --valuefieldnames> I<Name,Label,[Name,Label,...]> | |
519 | |
520 Specify SD data field names to use for calculated values. In general, it's a comma delimited | |
521 list of value name and SD field name pairs. Supported value names: I<ElementalAnalysis, | |
522 MolecularWeight, ExactMass, and MolecularFormula>. Default labels: I<ElementalAnalysis, | |
523 MolecularWeight, ExactMass, and MolecularFormula>. | |
524 | |
525 I<MolecularFormula> label is only used during I<StructureData> value of | |
526 B<-f, --formulamode> option. | |
527 | |
528 =item B<-w, --workingdir> I<dirname> | |
529 | |
530 Location of working directory. Default: current directory. | |
531 | |
532 =back | |
533 | |
534 =head1 EXAMPLES | |
535 | |
536 To perform elemental analysis, calculate molecular weight and exact mass using SD | |
537 field name value with the word Formula in its name and generate a new SD file | |
538 NewSample1.sdf, type: | |
539 | |
540 % ElementalAnalysisSDFiles.pl -o -r NewSample1 Sample1.sdf | |
541 | |
542 To perform elemental analysis, calculate molecular weight and exact mass using | |
543 structure data in SD file and generate a new SD file NewSample1.sdf, type: | |
544 | |
545 % ElementalAnalysisSDFiles.pl --formulamode StructureData -o | |
546 -r NewSample1 Sample1.sdf | |
547 | |
548 To perform elemental analysis using formulas in SD field name Formula, use field name | |
549 Analysis for calculated data, and generate a new SD file NewSample1.sdf, type: | |
550 | |
551 % ElementalAnalysisSDFiles.pl --m ElementalAnalysis --formulafield | |
552 Formula --valuefieldnames "ElementalAnalysis,Analysis" -o | |
553 -r NewSample1 Sample1.sdf | |
554 | |
555 To calculate molecular weight, using formulas in SD field name Formula, with four decimal | |
556 precision and generate a new SD file NewSample1.sdf, type | |
557 | |
558 % ElementalAnalysisSDFiles.pl --m MolecularWeight --formulafield | |
559 Formula --precision 4 -o -r NewSample1 Sample1.sdf | |
560 | |
561 =head1 AUTHOR | |
562 | |
563 Manish Sud <msud@san.rr.com> | |
564 | |
565 =head1 SEE ALSO | |
566 | |
567 AnalyzeSDFilesData.pl, InfoSDFiles.pl, ExtractFromSDFiles.pl | |
568 | |
569 =head1 COPYRIGHT | |
570 | |
571 Copyright (C) 2015 Manish Sud. All rights reserved. | |
572 | |
573 This file is part of MayaChemTools. | |
574 | |
575 MayaChemTools is free software; you can redistribute it and/or modify it under | |
576 the terms of the GNU Lesser General Public License as published by the Free | |
577 Software Foundation; either version 3 of the License, or (at your option) | |
578 any later version. | |
579 | |
580 =cut |