comparison docs/modules/txt/SDFileUtil.txt @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 NAME
2 SDFileUtil
3
4 SYNOPSIS
5 use SDFileUtil ;
6
7 use SDFileUtil qw(:all);
8
9 DESCRIPTION
10 SDFileUtil module provides the following functions:
11
12 GenerateCmpdAtomAliasPropertyLines, GenerateCmpdAtomLine,
13 GenerateCmpdBondLine, GenerateCmpdChargePropertyLines,
14 GenerateCmpdCommentsLine, GenerateCmpdCountsLine,
15 GenerateCmpdDataHeaderLabelsAndValuesLines,
16 GenerateCmpdIsotopePropertyLines, GenerateCmpdMiscInfoLine,
17 GenerateCmpdMolNameLine, GenerateCmpdRadicalPropertyLines,
18 GenerateEmptyCtabBlockLines, GenerateMiscLineDateStamp,
19 GetAllAndCommonCmpdDataHeaderLabels, GetCmpdDataHeaderLabels,
20 GetCmpdDataHeaderLabelsAndValues, GetCmpdFragments, GetCtabLinesCount,
21 GetInvalidAtomNumbers, GetUnknownAtoms, InternalBondOrderToMDLBondType,
22 InternalBondStereochemistryToMDLBondStereo, InternalChargeToMDLCharge,
23 InternalSpinMultiplicityToMDLRadical, IsCmpd2D, IsCmpd3D,
24 MDLBondStereoToInternalBondStereochemistry,
25 MDLBondTypeToInternalBondOrder, MDLChargeToInternalCharge,
26 MDLRadicalToInternalSpinMultiplicity, ParseCmpdAtomAliasPropertyLine,
27 ParseCmpdAtomLine, ParseCmpdBondLine, ParseCmpdChargePropertyLine,
28 ParseCmpdCommentsLine, ParseCmpdCountsLine,
29 ParseCmpdIsotopePropertyLine, ParseCmpdMiscInfoLine,
30 ParseCmpdMolNameLine, ParseCmpdRadicalPropertyLine, ReadCmpdString,
31 RemoveCmpdDataHeaderLabelAndValue, WashCmpd
32
33 METHODS
34 GenerateCmpdAtomAliasPropertyLines
35 @Lines = GenerateCmpdAtomAliasPropertyLines($AliasValuePairsRef);
36
37 Returns a formatted atom alias property lines corresponding to
38 successive pairs of atom number and alias values specified by a
39 refernce to an array. Two lines are generate for each atom number
40 and alias value pairs: First line - A <AtomNum>; Second
41 line:<AtomAlias>.
42
43 GenerateCmpdAtomLine
44 $Line = GenerateCmpdAtomLine($AtomSymbol, $AtomX, $AtomY,
45 $AtomZ, [$MassDifference, $Charge, $StereoParity]);
46
47 Returns a formatted atom data line containing all the input values.
48
49 GenerateCmpdBondLine
50 $Line = GenerateCmpdBondLine($FirstAtomNum, $SecondAtomNum,
51 $BondType, [$BondStereo]);
52
53 Returns a formatted bond data line containing all the input values.
54
55 GenerateCmpdChargePropertyLines
56 @Lines = GenerateCmpdChargePropertyLines($ChargeValuePairsRef);
57
58 Returns a formatted M CHG property lines corresponding to successive
59 pairs of atom number and charge values specified by a refernce to an
60 array.
61
62 GenerateCmpdCommentsLine
63 $Line = GenerateCmpdCommentsLine($Comments);
64
65 Returns a formatted comments data line.
66
67 GenerateCmpdCountsLine
68 $Line = GenerateCmpdCountsLine($AtomCount, $BondCount,
69 $ChiralFlag, [$PropertyCount, $Version]);
70
71 Returns a formatted line containing all the input values. The
72 default values of 999 and V2000 are used for *PropertyCount* and
73 *Version*.
74
75 GenerateCmpdDataHeaderLabelsAndValuesLines
76 @Lines = GenerateCmpdDataHeaderLabelsAndValuesLines(
77 $DataHeaderLabelsRef, $DataHeaderLabelsAndValuesRef,
78 [$SortDataLabels]);
79
80 Returns formatted data lines containing header label and values
81 lines corresponding to all data header labels in array reference
82 *DataHeaderLabelsRef* with values in hash reference
83 *DataHeaderLabelsAndValuesRef*. By default, data header labels are
84 not sorted and correspond to the label order in array reference
85 *DataHeaderLabelsRef*.
86
87 GenerateCmpdIsotopePropertyLines
88 @Lines = GenerateCmpdIsotopePropertyLines($IsotopeValuePairsRef);
89
90 Returns a formatted M ISO property lines corresponding to successive
91 pairs of atom number and isotope values specified by a refernce to
92 an array.
93
94 GenerateCmpdMiscInfoLine
95 $Line = GenerateCmpdMiscInfoLine([$ProgramName, $UserInitial,
96 $Code]);
97
98 Returns a formatted line containing specified user initial, program
99 name, date and code. Default values are: *ProgramName - MayaChem;
100 UserInitial - NULL; Code - 2D*.
101
102 GenerateCmpdMolNameLine
103 $Line = GenerateCmpdMolNameLine($MolName);
104
105 Returns a formatted molecule name data line.
106
107 GenerateCmpdRadicalPropertyLines
108 @Lines = GenerateCmpdRadicalPropertyLines($RadicalValuePairsRef);
109
110 Returns a formatted M CHG property lines corresponding to successive
111 pairs of atom number and multiplicity values specified by a refernce
112 to an array.
113
114 GenerateEmptyCtabBlockLines
115 $Lines = GenerateCmpdMiscInfoLine([$Date]);
116
117 Returns formatted lines representing empty CTAB block.
118
119 GenerateMiscLineDateStamp
120 $Line = GenerateMiscLineDateStamp();
121
122 Returns date stamp for misc line.
123
124 GetAllAndCommonCmpdDataHeaderLabels
125 ($CmpdCount, $DataFieldLabelsArrayRef,
126 $CommonDataFieldLabelsArrayRef) =
127 GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
128
129 Returns number of comopunds, a reference to an array containing all
130 unique data header label and a reference to an array containing
131 common data field labels for all compounds in SD file.
132
133 GetCmpdDataHeaderLabels
134 (@Labels) = GetCmpdDataHeaderLabels(\@CmpdLines);
135
136 Returns an array containg data header labels for a compound
137
138 GetCmpdDataHeaderLabelsAndValues
139 (%DataValues) = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
140
141 Returns a hash conating data header labes and values for a compound.
142
143 GetCmpdFragments
144 ($FragmentCount, $FragmentString) = GetCmpdFragments(\@CmpLines);
145
146 Figures out the number of disconnected fragments and return their
147 values along with the atom numbers in a string delimited by new line
148 character. Fragment data in FragmentString is sorted on based on its
149 size.
150
151 GetCtabLinesCount
152 $CtabLinesCount = GetCtabLinesCount(\@CmpdLines);
153
154 Returns number of lines present between the 4th line and the line
155 containg "M END".
156
157 GetInvalidAtomNumbers
158 ($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) =
159 GetInvalidAtomNumbers(\@CmpdLines);
160
161 Returns a list of values containing information about invalid atom
162 numbers present in block or atom property lines.
163
164 GetUnknownAtoms
165 ($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) =
166 GetUnknownAtoms(\@CmpdLines);
167
168 Returns a list of values containing information about atoms which
169 contain special element symbols not present in the periodic table.
170
171 InternalBondOrderToMDLBondType
172 $MDLBondType = InternalBondOrderToMDLBondType($InternalBondOrder);
173
174 Returns value of *MDLBondType* corresponding to *InternalBondOrder*.
175
176 InternalBondOrder MDLBondType
177
178 1 1
179 2 2
180 3 3
181 1.5 4
182
183 InternalBondStereochemistryToMDLBondStereo
184 $MDLBondStereo = InternalBondStereochemistryToMDLBondStereo(
185 $InternalBondStereo);
186
187 Returns value of *MDLBondStereo* corresponding to
188 *InternalBondStereo* using following mapping:
189
190 InternalBondStereo MDLBondStereo
191
192 Up 1
193 UpOrDown 4
194 Down 6
195 CisOrTrans 3
196 Other 0
197
198 InternalChargeToMDLCharge
199 $MDLCharge = InternalChargeToMDLCharge($InternalCharge);
200
201 Returns value of *MDLCharge* corresponding to *InternalCharge* using
202 following mapping:
203
204 InternalCharge MDLCharge
205
206 3 1
207 2 2
208 1 3
209 -1 5
210 -2 6
211 -3 7
212
213 InternalSpinMultiplicityToMDLRadical
214 $MDLRadical = InternalSpinMultiplicityToMDLRadical(
215 $InternalSpinMultiplicity);
216
217 Returns value of *MDLRadical* corresponding to
218 *InternalSpinMultiplicity*. These value are equivalent.
219
220 MDLBondStereoToInternalBondType
221 $InternalBondType = MDLBondStereoToInternalBondType($MDLBondStereo);
222
223 Returns value of *InternalBondType* corresponding to *MDLBondStereo*
224 using mapping shown for InternalBondTypeToMDLBondStereo function.
225
226 IsCmpd2D
227 $Status = IsCmpd2D();
228
229 Returns 1 or 0 based on whether z-coordinate of any atom is
230 non-zero.
231
232 IsCmpd3D
233 $Status = IsCmpd3D();
234
235 Returns 1 or 0 based on whether z-coordinate of any atom is
236 non-zero.
237
238 MDLBondStereoToInternalBondStereochemistry
239 $InternalBondStereo = MDLBondStereoToInternalBondStereochemistry(
240 $MDLBondStereo);
241
242 Returns value of *InternalBondStereo* corresponding to
243 *MDLBondStereo* using mapping shown for
244 InternalBondStereochemistryToMDLBondStereo function.
245
246 MDLBondTypeToInternalBondOrder
247 $InternalBondOrder = MDLBondTypeToInternalBondOrder($MDLBondType);
248
249 Returns value of *InternalBondOrder* corresponding to *MDLBondType*
250 using mapping shown for InternalBondOrderToMDLBondType function.
251
252 MDLChargeToInternalCharge
253 $InternalCharge = MDLChargeToInternalCharge($MDLCharge);
254
255 Returns value of *$InternalCharge* corresponding to *MDLCharge*
256 using mapping shown for InternalChargeToMDLCharge function.
257
258 MDLRadicalToInternalSpinMultiplicity
259 $InternalSpinMultiplicity = MDLRadicalToInternalSpinMultiplicity(
260 $MDLRadical);
261
262 Returns value of *InternalSpinMultiplicity* corresponding to
263 *MDLRadical*. These value are equivalent.
264
265 ParseCmpdAtomAliasPropertyLine
266 @AtomNumAndValuePairs = ParseCmpdAtomAliasPropertyLine(
267 $CurrentLine, $NexLine);
268
269 Parses atom alias propery lines in CTAB generic properties block and
270 returns an array with successive pairs of values corresponding to
271 atom number and its alias.
272
273 ParseCmpdAtomLine
274 ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge,
275 $StereoParity) = ParseCmpdAtomLine($AtomDataLine);
276
277 Parses compound data line containing atom information and returns a
278 list of values.
279
280 ParseCmpdBondLine
281 ($FirstAtomNum, $SecondAtomNum, $BondType) =
282 ParseCmpdBondLine($BondDataLine);
283
284 Parses compound data line containing bond information and returns a
285 list of values.
286
287 ParseCmpdCommentsLine
288 $Comments = ParseCmpdCommentsLine($CommentsDataLine);
289
290 Returns the comment string.
291
292 ParseCmpdChargePropertyLine
293 @AtomNumAndValuePairs = ParseCmpdChargePropertyLine(
294 $ChargeDataLine);
295
296 Parses charge propery line in CTAB generic properties block and
297 returns an array with successive pairs of values corresponding to
298 atom number and its charge.
299
300 ParseCmpdCountsLine
301 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) =
302 ParseCmpdCountsLine(\@CountDataLines);
303
304 Returns a list of values containing count information.
305
306 ParseCmpdMiscInfoLine
307 ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2,
308 $Energy, $RegistryNum) = ParseCmpdMiscInfoLine($Line);
309
310 Returns a list of values containing miscellaneous information.
311
312 ParseCmpdIsotopePropertyLine
313 @AtomNumAndValuePairs = ParseCmpdIsotopePropertyLine(
314 $IsotopeDataLine);
315
316 Parses isotopic propery line in CTAB generic properties block and
317 returns an array with successive pairs of values corresponding to
318 atom number and absolute mass of atom isotope.
319
320 ParseCmpdMolNameLine
321 $MolName = ParseCmpdMolNameLine($Line);
322
323 Returns a string containing molecule name.
324
325 ParseCmpdRadicalPropertyLine
326 @AtomNumAndValuePairs = ParseCmpdRadicalPropertyLine(
327 $RadicalDataLine);
328
329 Parses radical propery line in CTAB generic properties block and
330 returns an array with successive pairs of values corresponding to
331 atom number and radical number value.
332
333 RemoveCmpdDataHeaderLabelAndValue
334 $NewCmpdString = RemoveCmpdDataHeaderLabelAndValue($CmpdString,
335 $DataHeaderLabel);
336
337 Returns a NewCmpdString after removing *DataHeaderLabel* along with
338 its value from *CmpdString*.
339
340 ReadCmpdString
341 $CmpdString = ReadCmpdString(\*SDFILEHANDLE);
342
343 Returns a string containing all the data lines for the next
344 available compound in an already open file indicated by
345 SDFILEHANDLE. A NULL string is returned on EOF.
346
347 WashCmpd
348 ($FragmentCount, $Fragments, $WashedCmpdString) =
349 WashCmpd(\@CmpdLines);
350
351 Figures out the number of disconnected fragments and return their
352 values along with the atom numbers in a string delimited by new line
353 character. Fragment data in FragmentString is sorted on based on its
354 size.
355
356 AUTHOR
357 Manish Sud <msud@san.rr.com>
358
359 SEE ALSO
360 TextUtil.pm
361
362 COPYRIGHT
363 Copyright (C) 2015 Manish Sud. All rights reserved.
364
365 This file is part of MayaChemTools.
366
367 MayaChemTools is free software; you can redistribute it and/or modify it
368 under the terms of the GNU Lesser General Public License as published by
369 the Free Software Foundation; either version 3 of the License, or (at
370 your option) any later version.
371