annotate mayachemtools/lib/SDFileUtil.pm @ 9:ab29fa5c8c1f draft default tip

Uploaded
author deepakjadmin
date Thu, 15 Dec 2016 14:18:03 -0500
parents 73ae111cf86f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1 package SDFileUtil;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
2 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: SDFileUtil.pm,v $
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/02/28 20:47:18 $
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.49 $
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
6 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
8 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
10 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
12 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
17 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
22 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
27 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
28
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
30 use Exporter;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
31 use Carp;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
32 use PeriodicTable qw(IsElement);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
33 use TimeUtil ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
34
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
36
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
37 @ISA = qw(Exporter);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
38 @EXPORT = qw(GenerateCmpdAtomLine GenerateCmpdBondLine GenerateCmpdChargePropertyLines GenerateCmpdCommentsLine GenerateCmpdCountsLine GenerateCmpdAtomAliasPropertyLines GenerateCmpdIsotopePropertyLines GenerateCmpdDataHeaderLabelsAndValuesLines GenerateCmpdMiscInfoLine GenerateCmpdRadicalPropertyLines GenerateCmpdMolNameLine GenerateEmptyCtabBlockLines GenerateMiscLineDateStamp GetAllAndCommonCmpdDataHeaderLabels GetCmpdDataHeaderLabels GetCmpdDataHeaderLabelsAndValues GetCmpdFragments GetCtabLinesCount GetUnknownAtoms GetInvalidAtomNumbers MDLChargeToInternalCharge InternalChargeToMDLCharge MDLBondTypeToInternalBondOrder InternalBondOrderToMDLBondType MDLBondStereoToInternalBondStereochemistry InternalBondStereochemistryToMDLBondStereo InternalSpinMultiplicityToMDLRadical MDLRadicalToInternalSpinMultiplicity IsCmpd3D IsCmpd2D ParseCmpdAtomLine ParseCmpdBondLine ParseCmpdCommentsLine ParseCmpdCountsLine ParseCmpdMiscInfoLine ParseCmpdMolNameLine ParseCmpdAtomAliasPropertyLine ParseCmpdChargePropertyLine ParseCmpdIsotopePropertyLine ParseCmpdRadicalPropertyLine ReadCmpdString RemoveCmpdDataHeaderLabelAndValue WashCmpd);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
39 @EXPORT_OK = qw();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
40 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
41
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
42 # Format data for compounds count line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
43 sub GenerateCmpdCountsLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
44 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version, $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
45
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
46 if (@_ == 5) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
47 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
48 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
49 elsif (@_ == 3) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
50 ($AtomCount, $BondCount, $ChiralFlag) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
51 $PropertyCount = 999;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
52 $Version = "V2000";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
53 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
54 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
55 ($AtomCount, $BondCount) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
56 $ChiralFlag = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
57 $PropertyCount = 999;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
58 $Version = "V2000";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
59 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
60 if ($AtomCount > 999) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
61 croak "Error: SDFileUtil::GenerateCmpdCountsLine: The atom count, $AtomCount, exceeds maximum of 999 allowed for CTAB version 2000. The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools...";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
62 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
63 $Line = sprintf "%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%6s", $AtomCount, $BondCount, 0, 0, $ChiralFlag, 0, 0, 0, 0, 0, $PropertyCount, $Version;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
64
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
65 return ($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
66 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
67
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
68 # Generate comments line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
69 sub GenerateCmpdCommentsLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
70 my($Comments) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
71 my($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
72
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
73 $Line = (length($Comments) > 80) ? substr($Comments, 0, 80) : $Comments;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
74
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
75 return $Line;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
76 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
77
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
78 # Generate molname line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
79 sub GenerateCmpdMolNameLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
80 my($MolName) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
81 my($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
82
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
83 $Line = (length($MolName) > 80) ? substr($MolName, 0, 80) : $MolName;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
84
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
85 return $Line;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
86 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
87
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
88 # Generate data for compounds misc info line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
89 sub GenerateCmpdMiscInfoLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
90 my($ProgramName, $UserInitial, $Code) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
91 my($Date, $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
92
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
93 if (!(defined($ProgramName) && $ProgramName)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
94 $ProgramName = "MayaChem";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
95 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
96 if (!(defined($UserInitial) && $UserInitial)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
97 $UserInitial = " ";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
98 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
99 if (!(defined($Code) && $Code)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
100 $Code = "2D";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
101 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
102
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
103 if (length($ProgramName) > 8) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
104 $ProgramName = substr($ProgramName, 0, 8);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
105 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
106 if (length($UserInitial) > 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
107 $UserInitial = substr($UserInitial, 0, 2);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
108 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
109 if (length($Code) > 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
110 $Code = substr($Code, 0, 2);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
111 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
112 $Date = GenerateMiscLineDateStamp();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
113
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
114 $Line = "${UserInitial}${ProgramName}${Date}${Code}";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
115
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
116 return $Line;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
117 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
118
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
119 # Generate data for compounds misc info line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
120 sub GenerateEmptyCtabBlockLines {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
121 my($Date, $Lines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
122
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
123 if (@_ == 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
124 ($Date) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
125 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
126 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
127 $Date = GenerateMiscLineDateStamp();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
128 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
129 # First line: Blank molname line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
130 # Second line: Misc info...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
131 # Third line: Blank comments line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
132 # Fourth line: Counts line reflecting empty structure data block...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
133 $Lines = "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
134 $Lines .= " MayaChem${Date}2D\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
135 $Lines .= "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
136 $Lines .= GenerateCmpdCountsLine(0, 0, 0) . "\n";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
137 $Lines .= "M END";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
138
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
139 return $Lines;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
140 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
141
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
142 # Generate SD file data stamp...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
143 sub GenerateMiscLineDateStamp {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
144 return TimeUtil::SDFileTimeStamp();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
145 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
146
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
147 # Generate data for compound atom line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
148 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
149 sub GenerateCmpdAtomLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
150 my($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
151 my($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
152
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
153 if (!defined $MassDifference) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
154 $MassDifference = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
155 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
156 if (!defined $Charge) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
157 $Charge = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
158 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
159 if (!defined $StereoParity) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
160 $StereoParity = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
161 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
162 $Line = sprintf "%10.4f%10.4f%10.4f %-3s%2i%3i%3i 0 0 0 0 0 0 0 0 0", $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
163
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
164 return $Line
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
165 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
166
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
167 # Generate data for compound bond line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
168 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
169 sub GenerateCmpdBondLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
170 my($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
171 my($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
172
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
173 if (!defined $BondStereo) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
174 $BondStereo = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
175 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
176 $Line = sprintf "%3i%3i%3i%3i 0 0 0", $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
177
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
178 return $Line
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
179 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
180
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
181 # Generate charge property lines for CTAB block...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
182 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
183 sub GenerateCmpdChargePropertyLines {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
184 my($ChargeValuePairsRef) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
185
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
186 return _GenerateCmpdGenericPropertyLines('Charge', $ChargeValuePairsRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
187 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
188
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
189 # Generate isotope property lines for CTAB block...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
190 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
191 sub GenerateCmpdIsotopePropertyLines {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
192 my($IsotopeValuePairsRef) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
193
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
194 return _GenerateCmpdGenericPropertyLines('Isotope', $IsotopeValuePairsRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
195 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
196
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
197 # Generate radical property line property lines for CTAB block...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
198 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
199 sub GenerateCmpdRadicalPropertyLines {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
200 my($RadicalValuePairsRef) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
201
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
202 return _GenerateCmpdGenericPropertyLines('Radical', $RadicalValuePairsRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
203 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
204
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
205 # Generate atom alias property line property lines for CTAB block...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
206 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
207 # Atom alias property line format:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
208 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
209 # A aaa
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
210 # x...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
211 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
212 # aaa: Atom number
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
213 # x: Atom alias in next line
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
214 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
215 sub GenerateCmpdAtomAliasPropertyLines {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
216 my($PropertyValuePairsRef) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
217 my($Index, $AtomNum, $AtomAlias, $Line, @PropertyLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
218
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
219 @PropertyLines = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
220
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
221 for ($Index = 0; $Index < $#{$PropertyValuePairsRef}; $Index += 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
222 $AtomNum = $PropertyValuePairsRef->[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
223 $AtomAlias = $PropertyValuePairsRef->[$Index + 1];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
224
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
225 $Line = "A " . sprintf "%3i", $AtomNum;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
226
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
227 push @PropertyLines, $Line;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
228 push @PropertyLines, $AtomAlias;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
229 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
230
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
231 return @PropertyLines;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
232 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
233
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
234 # Generate data header labels and values lines...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
235 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
236 sub GenerateCmpdDataHeaderLabelsAndValuesLines {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
237 my($DataHeaderLabelsRef, $DataHeaderLabelsAndValuesRef, $SortDataLabels) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
238 my($DataLabel, $DataValue, @DataLabels, @DataLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
239
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
240 if (!defined $SortDataLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
241 $SortDataLabels = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
242 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
243
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
244 @DataLines = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
245 @DataLabels = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
246 if ($SortDataLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
247 push @DataLabels, sort @{$DataHeaderLabelsRef};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
248 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
249 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
250 push @DataLabels, @{$DataHeaderLabelsRef};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
251 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
252 for $DataLabel (@DataLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
253 $DataValue = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
254 if (exists $DataHeaderLabelsAndValuesRef->{$DataLabel}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
255 $DataValue = $DataHeaderLabelsAndValuesRef->{$DataLabel};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
256 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
257 push @DataLines, ("> <${DataLabel}>", "$DataValue", "");
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
258 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
259 return @DataLines;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
260 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
261
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
262 # Parse data field header in SD file and return lists of all and common data field
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
263 # labels.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
264 sub GetAllAndCommonCmpdDataHeaderLabels {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
265 my($SDFileRef) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
266 my($CmpdCount, $CmpdString, $Label, @CmpdLines, @DataFieldLabels, @CommonDataFieldLabels, %DataFieldLabelsMap);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
267
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
268 $CmpdCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
269 @DataFieldLabels = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
270 @CommonDataFieldLabels = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
271 %DataFieldLabelsMap = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
272
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
273 while ($CmpdString = ReadCmpdString($SDFileRef)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
274 $CmpdCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
275 @CmpdLines = split "\n", $CmpdString;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
276 # Process compound data header labels and figure out which ones are present for
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
277 # all the compounds...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
278 if (@DataFieldLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
279 my (@CmpdDataFieldLabels) = GetCmpdDataHeaderLabels(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
280 my(%CmpdDataFieldLabelsMap) = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
281 # Setup a map for the current labels...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
282 for $Label (@CmpdDataFieldLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
283 $CmpdDataFieldLabelsMap{$Label} = "PresentInSome";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
284 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
285 # Check the presence old labels for this compound; otherwise, mark 'em new...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
286 for $Label (@DataFieldLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
287 if (!$CmpdDataFieldLabelsMap{$Label}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
288 $DataFieldLabelsMap{$Label} = "PresentInSome";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
289 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
290 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
291 # Check the presence this compound in the old labels; otherwise, add 'em...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
292 for $Label (@CmpdDataFieldLabels ) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
293 if (!$DataFieldLabelsMap{$Label}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
294 # It's a new label...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
295 push @DataFieldLabels, $Label;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
296 $DataFieldLabelsMap{$Label} = "PresentInSome";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
297 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
298 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
299 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
300 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
301 # Get the initial label set and set up a map...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
302 @DataFieldLabels = GetCmpdDataHeaderLabels(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
303 for $Label (@DataFieldLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
304 $DataFieldLabelsMap{$Label} = "PresentInAll";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
305 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
306 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
307 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
308 # Identify the common data field labels...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
309 @CommonDataFieldLabels = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
310 for $Label (@DataFieldLabels) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
311 if ($DataFieldLabelsMap{$Label} eq "PresentInAll") {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
312 push @CommonDataFieldLabels, $Label;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
313 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
314 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
315 return ($CmpdCount, \@DataFieldLabels, \@CommonDataFieldLabels);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
316 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
317
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
318 # Parse all the data header labels and return 'em as an list...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
319 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
320 # Format:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
321 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
322 #> Data header line
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
323 #Data line(s)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
324 #Blank line
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
325 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
326 # [Data Header] (one line) precedes each item of data, starts with a greater than (>) sign, and
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
327 # contains at least one of the following:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
328 # The field name enclosed in angle brackets. For example: <melting.point>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
329 # The field number, DTn , where n represents the number assigned to the field in a MACCS-II database
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
330 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
331 #Optional information for the data header includes:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
332 # The compound’s external and internal registry numbers. External registry numbers must be enclosed in parentheses.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
333 # Any combination of information
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
334 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
335 #The following are examples of valid data headers:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
336 #> <MELTING.POINT>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
337 #> 55 (MD-08974) <BOILING.POINT> DT12
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
338 #> DT12 55
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
339 #> (MD-0894) <BOILING.POINT> FROM ARCHIVES
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
340 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
341 #Notes: Sometimes last blank line is missing and can be just followed by $$$$
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
342 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
343 sub GetCmpdDataHeaderLabels {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
344 my($CmpdLines) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
345 my($CmpdLine, $Label, @Labels);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
346
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
347 @Labels = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
348 CMPDLINE: for $CmpdLine (@$CmpdLines) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
349 if ($CmpdLine !~ /^>/) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
350 next CMPDLINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
351 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
352 # Does the line contains field name enclosed in angular brackets?
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
353 ($Label) = $CmpdLine =~ /<.*?>/g;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
354 if (!defined($Label)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
355 next CMPDLINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
356 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
357 $Label =~ s/(<|>)//g;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
358 push @Labels, $Label;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
359 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
360 return (@Labels);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
361 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
362
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
363 # Parse all the data header labels and values
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
364 sub GetCmpdDataHeaderLabelsAndValues {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
365 my($CmpdLines) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
366 my($CmpdLine, $CurrentLabel, $Label, $Value, $ValueCount, $ProcessingLabelData, @Values, %DataFields);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
367
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
368 %DataFields = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
369 $ProcessingLabelData = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
370 $ValueCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
371 CMPDLINE: for $CmpdLine (@$CmpdLines) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
372 if ($CmpdLine =~ /^\$\$\$\$/) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
373 last CMPDLINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
374 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
375 if ($CmpdLine =~ /^>/) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
376 # Does the line contains field name enclosed in angular brackets?
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
377 ($Label) = $CmpdLine =~ /<.*?>/g;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
378 if (defined $Label) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
379 $CurrentLabel = $Label;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
380 $CurrentLabel =~ s/(<|>)//g;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
381 $ProcessingLabelData = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
382 $ValueCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
383
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
384 if ($CurrentLabel) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
385 $ProcessingLabelData = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
386 $DataFields{$CurrentLabel} = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
387 next CMPDLINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
388 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
389 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
390 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
391 if (!$ProcessingLabelData) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
392 # Data line containing no <label> as allowed by SDF format. Just ignore it...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
393 next CMPDLINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
394 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
395 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
396 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
397 if (!$ProcessingLabelData) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
398 next CMPDLINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
399 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
400 if (!(defined($CmpdLine) && length($CmpdLine))) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
401 # Blank line terminates value for a label...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
402 $CurrentLabel = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
403 $ValueCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
404 $ProcessingLabelData = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
405 next CMPDLINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
406 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
407 $ValueCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
408 $Value = $CmpdLine;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
409
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
410 if ($ValueCount > 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
411 $DataFields{$CurrentLabel} .= "\n" . $Value;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
412 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
413 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
414 $DataFields{$CurrentLabel} = $Value;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
415 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
416 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
417 return (%DataFields);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
418 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
419
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
420 # Return an updated compoud string after removing data header label along with its
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
421 # value from the specified compound string...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
422 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
423 sub RemoveCmpdDataHeaderLabelAndValue {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
424 my($CmpdString, $DataHeaderLabel) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
425 my($Line, $PorcessingDataHeaderLabel, @CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
426
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
427 @CmpdLines = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
428 $PorcessingDataHeaderLabel = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
429
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
430 CMPDLINE: for $Line (split "\n", $CmpdString) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
431 if ($Line =~ /^>/ && $Line =~ /<$DataHeaderLabel>/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
432 $PorcessingDataHeaderLabel = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
433 next CMPDLINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
434 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
435
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
436 if ($PorcessingDataHeaderLabel) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
437 # Blank line indicates end of fingerprints data value...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
438 if ($Line =~ /^\$\$\$\$/) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
439 push @CmpdLines, $Line;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
440 $PorcessingDataHeaderLabel = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
441 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
442 elsif (!length($Line)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
443 $PorcessingDataHeaderLabel = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
444 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
445 next CMPDLINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
446 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
447
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
448 # Track compound lines without fingerprints data...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
449 push @CmpdLines, $Line;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
450 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
451
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
452 return join "\n", @CmpdLines;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
453 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
454
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
455 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
456 # Using bond blocks, figure out the number of disconnected fragments and
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
457 # return their values along with the atom numbers in a string delimited by new
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
458 # line character.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
459 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
460 sub GetCmpdFragments {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
461 my($CmpdLines) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
462 my($AtomCount, $BondCount, $FirstAtomNum, $SecondAtomNum, @AtomConnections, $BondType, $FragmentString, $FragmentCount, $LineIndex, $Index, $AtomNum, $NbrAtomNum, @ProcessedAtoms, $ProcessedAtomCount, $ProcessAtomNum, @ProcessingAtoms, @ConnectedAtoms, %Fragments, $FragmentNum, $AFragmentString);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
463
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
464 # Setup the connection table for each atom...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
465 @AtomConnections = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
466 ($AtomCount, $BondCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
467 for $AtomNum (1 .. $AtomCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
468 %{$AtomConnections[$AtomNum]} = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
469 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
470 for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
471 ($FirstAtomNum, $SecondAtomNum, $BondType) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
472 if (!$AtomConnections[$FirstAtomNum]{$SecondAtomNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
473 $AtomConnections[$FirstAtomNum]{$SecondAtomNum} = $BondType;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
474 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
475 if (!$AtomConnections[$SecondAtomNum]{$FirstAtomNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
476 $AtomConnections[$SecondAtomNum]{$FirstAtomNum} = $BondType;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
477 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
478 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
479
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
480 #Get set to count fragments...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
481 $ProcessedAtomCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
482 $FragmentNum = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
483 %Fragments = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
484 @ProcessedAtoms = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
485 for $AtomNum (1 .. $AtomCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
486 $ProcessedAtoms[$AtomNum] = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
487 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
488 while ($ProcessedAtomCount < $AtomCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
489 @ProcessingAtoms = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
490 @ConnectedAtoms = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
491 ATOMNUM: for $AtomNum (1 .. $AtomCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
492 if (!$ProcessedAtoms[$AtomNum]) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
493 $ProcessedAtomCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
494 $ProcessedAtoms[$AtomNum] = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
495 push @ProcessingAtoms, $AtomNum;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
496 $FragmentNum++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
497 @{$Fragments{$FragmentNum} } = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
498 push @{$Fragments{$FragmentNum} }, $AtomNum;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
499 last ATOMNUM;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
500 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
501 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
502
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
503 # Go over the neighbors and follow the connection trail while collecting the
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
504 # atoms numbers present in the connected fragment...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
505 while (@ProcessingAtoms) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
506 for ($Index = 0; $Index < @ProcessingAtoms; $Index++) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
507 $ProcessAtomNum = $ProcessingAtoms[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
508 for $NbrAtomNum (keys %{$AtomConnections[$ProcessAtomNum]}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
509 if (!$ProcessedAtoms[$NbrAtomNum]) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
510 $ProcessedAtomCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
511 $ProcessedAtoms[$NbrAtomNum] = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
512 push @ConnectedAtoms, $NbrAtomNum;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
513 push @{ $Fragments{$FragmentNum} }, $NbrAtomNum;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
514 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
515 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
516 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
517 @ProcessingAtoms = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
518 @ProcessingAtoms = @ConnectedAtoms;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
519 @ConnectedAtoms = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
520 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
521 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
522 $FragmentCount = $FragmentNum;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
523 $FragmentString = "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
524
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
525 # Sort out the fragments by size...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
526 for $FragmentNum (sort { @{$Fragments{$b}} <=> @{$Fragments{$a}} } keys %Fragments ) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
527 # Sort the atoms in a fragment by their numbers...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
528 $AFragmentString = join " ", sort { $a <=> $b } @{ $Fragments{$FragmentNum} };
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
529 if ($FragmentString) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
530 $FragmentString .= "\n" . $AFragmentString;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
531 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
532 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
533 $FragmentString = $AFragmentString;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
534 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
535 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
536 return ($FragmentCount, $FragmentString);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
537 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
538
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
539 # Count number of lines present in between 4th and line containg "M END"
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
540 sub GetCtabLinesCount {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
541 my($CmpdLines) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
542 my($LineIndex, $CtabLinesCount);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
543
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
544 $CtabLinesCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
545 LINE: for ($LineIndex = 4; $LineIndex < @$CmpdLines; $LineIndex++) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
546 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
547 # Any line after atom and bond data starting with anything other than space or
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
548 # a digit indicates end of Ctab atom/bond data block...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
549 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
550 if (@$CmpdLines[$LineIndex] !~ /^[0-9 ]/) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
551 $CtabLinesCount = $LineIndex - 4;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
552 last LINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
553 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
554 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
555 return $CtabLinesCount;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
556 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
557
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
558 # Using atom blocks, count the number of atoms which contain special element
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
559 # symbols not present in the periodic table.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
560 sub GetUnknownAtoms {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
561 my($CmpdLines) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
562 my($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines, $LineIndex, $AtomCount, $AtomSymbol);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
563
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
564 $UnknownAtomCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
565 $UnknownAtoms = "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
566 $UnknownAtomLines = "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
567 ($AtomCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
568 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
569 ($AtomSymbol) = ParseCmpdAtomLine(@$CmpdLines[$LineIndex]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
570 if (!IsElement($AtomSymbol)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
571 $UnknownAtomCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
572 $UnknownAtoms .= " $AtomSymbol";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
573 if ($UnknownAtomLines) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
574 $UnknownAtomLines .= "\n" . @$CmpdLines[$LineIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
575 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
576 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
577 $UnknownAtomLines = @$CmpdLines[$LineIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
578 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
579 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
580 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
581 return ($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
582 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
583
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
584 # Check z coordinates of all atoms to see whether any of them is non-zero
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
585 # which makes the compound geometry three dimensional...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
586 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
587 sub IsCmpd3D {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
588 my($CmpdLines) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
589 my($LineIndex, $AtomCount, $AtomSymbol, $AtomX, $AtomY, $AtomZ);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
590
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
591 ($AtomCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
592 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
593 ($AtomSymbol, $AtomX, $AtomY, $AtomZ) = ParseCmpdAtomLine(@$CmpdLines[$LineIndex]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
594 if ($AtomZ != 0) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
595 return 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
596 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
597 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
598 return 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
599 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
600
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
601 # Check whether it's a 2D compound...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
602 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
603 sub IsCmpd2D {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
604 my($CmpdLines) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
605
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
606 return IsCmpd3D($CmpdLines) ? 0 : 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
607 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
608
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
609 # Using bond blocks, count the number of bond lines which contain atom numbers
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
610 # greater than atom count specified in compound count line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
611 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
612 sub GetInvalidAtomNumbers {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
613 my($CmpdLines) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
614 my($LineIndex, $AtomCount, $BondCount, $FirstAtomNum, $SecondAtomNum, $InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines, $Line, $InvalidAtomPropertyLine, $ValuePairIndex, $AtomNum, $Value, @ValuePairs);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
615
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
616 ($AtomCount, $BondCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
617
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
618 $InvalidAtomNumbersCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
619 $InvalidAtomNumbers = "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
620 $InvalidAtomNumberLines = "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
621
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
622 # Go over bond block lines...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
623 LINE: for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
624 ($FirstAtomNum, $SecondAtomNum) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
625 if ($FirstAtomNum <= $AtomCount && $SecondAtomNum <= $AtomCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
626 next LINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
627 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
628 if ($FirstAtomNum > $AtomCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
629 $InvalidAtomNumbersCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
630 $InvalidAtomNumbers .= " $FirstAtomNum";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
631 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
632 if ($SecondAtomNum > $AtomCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
633 $InvalidAtomNumbersCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
634 $InvalidAtomNumbers .= " $SecondAtomNum";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
635 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
636 if ($InvalidAtomNumberLines) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
637 $InvalidAtomNumberLines .= "\n" . @$CmpdLines[$LineIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
638 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
639 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
640 $InvalidAtomNumberLines = @$CmpdLines[$LineIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
641 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
642 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
643 # Go over property lines before M END...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
644 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
645 LINE: for ($LineIndex = (4 + $AtomCount + $BondCount); $LineIndex < @$CmpdLines; $LineIndex++) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
646 $Line = @$CmpdLines[$LineIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
647 @ValuePairs = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
648 if ($Line =~ /^M END/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
649 last LINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
650 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
651 @ValuePairs = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
652 if ($Line =~ /^M CHG/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
653 @ValuePairs = ParseCmpdChargePropertyLine($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
654 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
655 elsif ($Line =~ /^M RAD/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
656 @ValuePairs = ParseCmpdRadicalPropertyLine($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
657 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
658 elsif ($Line =~ /^M ISO/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
659 @ValuePairs = ParseCmpdIsotopePropertyLine($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
660 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
661 elsif ($Line =~ /^A /i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
662 my($NextLine);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
663 $LineIndex++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
664 $NextLine = @$CmpdLines[$LineIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
665 @ValuePairs = ParseCmpdAtomAliasPropertyLine($Line, $NextLine);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
666 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
667 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
668 next LINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
669 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
670
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
671 $InvalidAtomPropertyLine = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
672 for ($ValuePairIndex = 0; $ValuePairIndex < $#ValuePairs; $ValuePairIndex += 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
673 $AtomNum = $ValuePairs[$ValuePairIndex]; $Value = $ValuePairs[$ValuePairIndex + 1];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
674 if ($AtomNum > $AtomCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
675 $InvalidAtomPropertyLine = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
676 $InvalidAtomNumbersCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
677 $InvalidAtomNumbers .= " $AtomNum";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
678 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
679 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
680 if ($InvalidAtomPropertyLine) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
681 if ($InvalidAtomNumberLines) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
682 $InvalidAtomNumberLines .= "\n" . $Line;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
683 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
684 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
685 $InvalidAtomNumberLines = $Line;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
686 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
687 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
688 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
689
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
690 return ($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
691 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
692
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
693 # Ctab lines: Atom block
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
694 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
695 # Format: xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
696 # A10 A10 A10 xA3 A2A3 A3 A3 A3 A3 A3 A3 A3 A3 A3 A3
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
697 # x,y,z: Atom coordinates
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
698 # aaa: Atom symbol. Entry in periodic table or L for atom list, A, Q, * for unspecified
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
699 # atom, and LP for lone pair, or R# for Rgroup label
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
700 # dd: Mass difference. -3, -2, -1, 0, 1, 2, 3, 4 (0 for value beyond these limits)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
701 # ccc: Charge. 0 = uncharged or value other than these, 1 = +3, 2 = +2, 3 = +1,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
702 # 4 = doublet radical, 5 = -1, 6 = -2, 7 = -3
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
703 # sss: Atom stereo parity. 0 = not stereo, 1 = odd, 2 = even, 3 = either or unmarked stereo center
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
704 # hhh: Hydrogen count + 1. 1 = H0, 2 = H1, 3 = H2, 4 = H3, 5 = H4
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
705 # bbb: Stereo care box. 0 = ignore stereo configuration of this double bond atom, 1 = stereo
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
706 # configuration of double bond atom must match
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
707 # vvv: Valence. 0 = no marking (default)(1 to 14) = (1 to 14) 15 = zero valence
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
708 # HHH: H0 designator. 0 = not specified, 1 = no H atoms allowed (redundant due to hhh)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
709 # rrr: Not used
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
710 # iii: Not used
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
711 # mmm: Atom-atom mapping number. 1 - number of atoms
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
712 # nnn: Inversion/retention flag. 0 = property not applied, 1 = configuration is inverted,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
713 # 2 = configuration is retained.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
714 # eee: Exact change flag. 0 = property not applied, 1 = change on atom must be
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
715 # exactly as shown
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
716 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
717 # Notes:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
718 # . StereoParity: 1 - ClockwiseStereo, 2 - AntiClockwiseStereo; 3 - Either; 0 - none. These
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
719 # values determine chirailty around the chiral center; a non zero value indicates atom
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
720 # has been marked as chiral center.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
721 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
722 sub ParseCmpdAtomLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
723 my($Line) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
724 my ($LineIndex, $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
725
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
726 ($AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity) = ('') x 7;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
727 if (length($Line) > 31) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
728 ($AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity) = unpack("A10A10A10xA3A2A3A3", $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
729 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
730 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
731 ($AtomX, $AtomY, $AtomZ, $AtomSymbol) = unpack("A10A10A10", $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
732 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
733 return ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
734 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
735
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
736 # Map MDL charge value used in SD and MOL files to internal charge used by MayaChemTools.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
737 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
738 sub MDLChargeToInternalCharge {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
739 my($MDLCharge) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
740 my($InternalCharge);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
741
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
742 CHARGE: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
743 if ($MDLCharge == 0) { $InternalCharge = 0; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
744 if ($MDLCharge == 1) { $InternalCharge = 3; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
745 if ($MDLCharge == 2) { $InternalCharge = 2; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
746 if ($MDLCharge == 3) { $InternalCharge = 1; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
747 if ($MDLCharge == 5) { $InternalCharge = -1; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
748 if ($MDLCharge == 6) { $InternalCharge = -2; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
749 if ($MDLCharge == 7) { $InternalCharge = -3; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
750 # All other MDL charge values, including 4 corresponding to "doublet radical",
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
751 # are assigned internal value of 0.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
752 $InternalCharge = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
753 if ($MDLCharge != 4) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
754 carp "Warning: MDLChargeToInternalCharge: MDL charge value, $MDLCharge, is not supported: An internal charge value, 0, has been assigned...";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
755 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
756 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
757 return $InternalCharge;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
758 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
759
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
760 # Map internal charge used by MayaChemTools to MDL charge value used in SD and MOL files.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
761 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
762 sub InternalChargeToMDLCharge {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
763 my($InternalCharge) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
764 my($MDLCharge);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
765
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
766 CHARGE: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
767 if ($InternalCharge == 3) { $MDLCharge = 1; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
768 if ($InternalCharge == 2) { $MDLCharge = 2; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
769 if ($InternalCharge == 1) { $MDLCharge = 3; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
770 if ($InternalCharge == -1) { $MDLCharge = 5; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
771 if ($InternalCharge == -2) { $MDLCharge = 6; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
772 if ($InternalCharge == -3) { $MDLCharge = 7; last CHARGE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
773 # All other MDL charge values, including 4 corresponding to "doublet radical",
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
774 # are assigned internal value of 0.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
775 $MDLCharge = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
776 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
777 return $MDLCharge;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
778 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
779
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
780 # Ctab lines: Bond block
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
781 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
782 # Format: 111222tttsssxxxrrrccc
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
783 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
784 # 111: First atom number.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
785 # 222: Second atom number.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
786 # ttt: Bond type. 1 = Single, 2 = Double, 3 = Triple, 4 = Aromatic, 5 = Single or Double,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
787 # 6 = Single or Aromatic, 7 = Double or Aromatic, 8 = Any
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
788 # sss: Bond stereo. Single bonds: 0 = not stereo, 1 = Up, 4 = Either, 6 = Down,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
789 # Double bonds: 0 = Use x-, y-, z-coords from atom block to determine cis or trans,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
790 # 3 = Cis or trans (either) double bond
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
791 # xxx: Not used
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
792 # rrr: Bond topology. 0 = Either, 1 = Ring, 2 = Chain
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
793 # ccc: Reacting center status. 0 = unmarked, 1 = a center, -1 = not a center,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
794 # Additional: 2 = no change,4 = bond made/broken, 8 = bond order changes 12 = 4+8
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
795 # (both made/broken and changes); 5 = (4 + 1), 9 = (8 + 1), and 13 = (12 + 1) are also possible
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
796 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
797 sub ParseCmpdBondLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
798 my($Line) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
799 my($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
800
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
801 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = map {s/ //g; $_} unpack("A3A3A3A3", $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
802 return ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
803 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
804
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
805 # Map MDL bond type value used in SD and MOL files to internal bond order and bond types
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
806 # values used by MayaChemTools...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
807 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
808 sub MDLBondTypeToInternalBondOrder {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
809 my($MDLBondType) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
810 my($InternalBondOrder, $InternalBondType);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
811
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
812 $InternalBondType = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
813
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
814 BONDTYPE: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
815 if ($MDLBondType == 1) { $InternalBondOrder = 1; $InternalBondType = 'Single'; last BONDTYPE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
816 if ($MDLBondType == 2) { $InternalBondOrder = 2; $InternalBondType = 'Double'; last BONDTYPE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
817 if ($MDLBondType == 3) { $InternalBondOrder = 3; $InternalBondType = 'Triple'; last BONDTYPE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
818 if ($MDLBondType == 4) { $InternalBondOrder = 1.5; $InternalBondType = 'Aromatic'; last BONDTYPE;} # Aromatic
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
819 if ($MDLBondType == 5) { $InternalBondOrder = 1; $InternalBondType = 'SingleOrDouble'; last BONDTYPE;} # Aromatic
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
820 if ($MDLBondType == 6) { $InternalBondOrder = 1; $InternalBondType = 'SingleOrAromatic'; last BONDTYPE;} # Aromatic
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
821 if ($MDLBondType == 7) { $InternalBondOrder = 2; $InternalBondType = 'DoubleOrAromatic'; last BONDTYPE;} # Aromatic
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
822 if ($MDLBondType == 8) { $InternalBondOrder = 1; $InternalBondType = 'Any'; last BONDTYPE;} # Aromatic
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
823 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
824 # Although MDL aromatic bond values are used for query only and explicit Kekule bond order
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
825 # values must be assigned, internal value of 1.5 is allowed to indicate aromatic bond orders.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
826 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
827 # All other MDL bond type values - 5 = Single or Double, 6 = Single or Aromatic, 7 = Double or Aromatic,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
828 # 8 = Any - are also assigned appropriate internal value of 1: These are meant to be used for
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
829 # structure queries by MDL products.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
830 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
831 $InternalBondOrder = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
832 $InternalBondType = 'Single';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
833
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
834 carp "Warning: MDLBondTypeToInternalBondOrder: MDL bond type value, $MDLBondType, is not supported: An internal bond order value, 0, has been assigned...";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
835 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
836 return ($InternalBondOrder, $InternalBondType);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
837 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
838
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
839 # Map internal bond order and bond type values used by MayaChemTools to MDL bond type value used
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
840 # in SD and MOL files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
841 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
842 sub InternalBondOrderToMDLBondType {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
843 my($InternalBondOrder, $InternalBondType) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
844 my($MDLBondType);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
845
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
846 BONDTYPE: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
847 if ($InternalBondOrder == 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
848 if ($InternalBondType =~ /^SingleOrDouble$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
849 $MDLBondType = 5;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
850 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
851 elsif ($InternalBondType =~ /^SingleOrAromatic$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
852 $MDLBondType = 6;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
853 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
854 elsif ($InternalBondType =~ /^Any$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
855 $MDLBondType = 8;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
856 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
857 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
858 $MDLBondType = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
859 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
860 $MDLBondType = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
861 last BONDTYPE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
862 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
863 if ($InternalBondOrder == 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
864 if ($InternalBondType =~ /^DoubleOrAromatic$/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
865 $MDLBondType = 7;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
866 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
867 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
868 $MDLBondType = 2;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
869 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
870 last BONDTYPE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
871 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
872 if ($InternalBondOrder == 3) { $MDLBondType = 3; last BONDTYPE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
873 if ($InternalBondOrder == 1.5) { $MDLBondType = 4; last BONDTYPE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
874 if ($InternalBondType =~ /^Any$/i) { $MDLBondType = 8; last BONDTYPE;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
875
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
876 $MDLBondType = 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
877
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
878 carp "Warning: InternalBondOrderToMDLBondType: Internal bond order and type values, $InternalBondOrder and $InternalBondType, don't match any valid MDL bond type: MDL bond type value, 1, has been assigned...";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
879 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
880 return $MDLBondType;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
881 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
882
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
883 # Third line: Comments - A blank line is also allowed.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
884 sub ParseCmpdCommentsLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
885 my($Line) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
886 my($Comments);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
887
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
888 $Comments = unpack("A80", $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
889
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
890 return ($Comments);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
891 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
892
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
893 # Map MDL bond stereo value used in SD and MOL files to internal bond stereochemistry values used by MayaChemTools...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
894 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
895 sub MDLBondStereoToInternalBondStereochemistry {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
896 my($MDLBondStereo) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
897 my($InternalBondStereo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
898
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
899 $InternalBondStereo = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
900
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
901 BONDSTEREO: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
902 if ($MDLBondStereo == 1) { $InternalBondStereo = 'Up'; last BONDSTEREO;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
903 if ($MDLBondStereo == 4) { $InternalBondStereo = 'UpOrDown'; last BONDSTEREO;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
904 if ($MDLBondStereo == 6) { $InternalBondStereo = 'Down'; last BONDSTEREO;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
905 if ($MDLBondStereo == 3) { $InternalBondStereo = 'CisOrTrans'; last BONDSTEREO;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
906 if ($MDLBondStereo == 0) { $InternalBondStereo = 'None'; last BONDSTEREO;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
907
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
908 $InternalBondStereo = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
909 carp "Warning: MDLBondStereoToInternalBondType: MDL bond stereo value, $MDLBondStereo, is not supported: It has been ignored and bond order would be used to determine bond type...";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
910 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
911 return $InternalBondStereo;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
912 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
913
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
914 # Map internal bond stereochemistry values used by MayaChemTools to MDL bond stereo value used in SD and MOL files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
915 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
916 sub InternalBondStereochemistryToMDLBondStereo {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
917 my($InternalBondStereo) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
918 my($MDLBondStereo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
919
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
920 $MDLBondStereo = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
921
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
922 BONDSTEREO: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
923 if ($InternalBondStereo =~ /^Up$/i) { $MDLBondStereo = 1; last BONDSTEREO;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
924 if ($InternalBondStereo =~ /^UpOrDown$/i) { $MDLBondStereo = 4; last BONDSTEREO;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
925 if ($InternalBondStereo =~ /^Down$/) { $MDLBondStereo = 6; last BONDSTEREO;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
926 if ($InternalBondStereo =~ /^CisOrTrans$/) { $MDLBondStereo = 3; last BONDSTEREO;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
927
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
928 $MDLBondStereo = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
929 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
930 return $MDLBondStereo;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
931 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
932
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
933 # Fourth line: Counts
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
934 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
935 # Format: aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
936 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
937 # aaa: number of atoms; bbb: number of bonds; lll: number of atom lists; fff: (obsolete)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
938 # ccc: chiral flag: 0=not chiral, 1=chiral; sss: number of stext entries; xxx,rrr,ppp,iii:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
939 # (obsolete); mmm: number of lines of additional properties, including the M END line, No
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
940 # longer supported, default is set to 999; vvvvvv: version
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
941
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
942 sub ParseCmpdCountsLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
943 my($Line) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
944 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
945
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
946 if (length($Line) >= 39) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
947 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = unpack("A3A3x3x3A3x3x3x3x3x3A3A6", $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
948 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
949 elsif (length($Line) >= 15) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
950 ($PropertyCount, $Version) = ("999", "v2000");
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
951 ($AtomCount, $BondCount, $ChiralFlag) = unpack("A3A3x3x3A3", $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
952 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
953 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
954 ($ChiralFlag, $PropertyCount, $Version) = ("0", "999", "v2000");
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
955 ($AtomCount, $BondCount) = unpack("A3A3", $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
956 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
957
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
958 if ($Version =~ /V3000/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
959 # Current version of MayaChemTools modules and classes for processing MDL MOL and SD don't support
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
960 # V3000. So instead of relying on callers, just exit with an error to disable any processing of V3000
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
961 # format.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
962 croak "Error: SDFileUtil::ParseCmpdCountsLine: The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools...";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
963 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
964
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
965 return ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
966 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
967
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
968 # Second line: Misc info
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
969 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
970 # Format: IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
971 # A2A8 A10 A2I2A10 A12 A6
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
972 # User's first and last initials (I), program name (P), date/time (M/D/Y,H:m),
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
973 # dimensional codes - 2D or 3D (d),scaling factors (S, s), energy (E) if modeling program input,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
974 # internal registry number (R) if input through MDL form. A blank line is also allowed.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
975 sub ParseCmpdMiscInfoLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
976 my($Line) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
977 my($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
978
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
979 ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum) = unpack("A2A8A10A2A2A10A12A6", $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
980 return ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
981 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
982
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
983 # First line: Molecule name. This line is unformatted, but like all other lines in a
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
984 # molfile may not extend beyond column 80. A blank line is also allowed.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
985 sub ParseCmpdMolNameLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
986 my($Line) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
987 my($MolName);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
988
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
989 $MolName = unpack("A80", $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
990
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
991 return ($MolName);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
992 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
993
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
994 # Parse atom alias property line in CTAB generic properties block.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
995 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
996 # Atom alias property line format:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
997 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
998 # A aaa
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
999 # x...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1000 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1001 # aaa: Atom number
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1002 # x: Atom alias in next line
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1003 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1004 sub ParseCmpdAtomAliasPropertyLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1005 my($Line, $NextLine) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1006 my($Label, $AtomNumber, $AtomAlias);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1007
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1008 ($Label, $AtomNumber) = split(' ', $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1009 $AtomAlias = $NextLine;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1010
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1011 if (!$AtomAlias) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1012 carp "Warning: _ParseCmpdAtomAliasPropertyLine: No atom alias value specified on the line following atom alias property line...";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1013 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1014
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1015 return ($AtomNumber, $AtomAlias);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1016 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1017
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1018 # Parse charge property line in CTAB generic properties block.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1019 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1020 # Charge property line format:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1021 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1022 # M CHGnn8 aaa vvv ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1023 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1024 # nn8: Number of value pairs. Maximum of 8 pairs allowed.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1025 # aaa: Atom number
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1026 # vvv: -15 to +15. Default of 0 = uncharged atom. When present, this property supersedes
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1027 # all charge and radical values in the atom block, forcing a 0 charge on all atoms not
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1028 # listed in an M CHG or M RAD line.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1029 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1030 sub ParseCmpdChargePropertyLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1031 my($Line) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1032
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1033 return _ParseCmpdGenericPropertyLine('Charge', $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1034 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1035
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1036
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1037 # Parse isotope property line in CTAB generic properties block.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1038 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1039 # Isoptope property line format:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1040 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1041 # M ISOnn8 aaa vvv ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1042 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1043 # nn8: Number of value paris. Maximum of 8 pairs allowed.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1044 # aaa: Atom number
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1045 # vvv: Absolute mass of the atom isotope as a positive integer. When present, this property
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1046 # supersedes all isotope values in the atom block. Default (no entry) means natural
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1047 # abundance. The difference between this absolute mass value and the natural
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1048 # abundance value specified in the PTABLE.DAT file must be within the range of -18
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1049 # to +12
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1050 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1051 # Notes:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1052 # . Values correspond to mass numbers...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1053 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1054 sub ParseCmpdIsotopePropertyLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1055 my($Line) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1056
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1057 return _ParseCmpdGenericPropertyLine('Isotope', $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1058 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1059
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1060 # Parse radical property line in CTAB generic properties block.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1061 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1062 # Radical property line format:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1063 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1064 # M RADnn8 aaa vvv ...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1065 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1066 # nn8: Number of value paris. Maximum of 8 pairs allowed.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1067 # aaa: Atom number
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1068 # vvv: Default of 0 = no radical, 1 = singlet, 2 = doublet, 3 = triplet . When
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1069 # present, this property supersedes all charge and radical values in the atom block,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1070 # forcing a 0 (zero) charge and radical on all atoms not listed in an M CHG or
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1071 # M RAD line.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1072 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1073 sub ParseCmpdRadicalPropertyLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1074 my($Line) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1075
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1076 return _ParseCmpdGenericPropertyLine('Radical', $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1077 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1078
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1079 # Map MDL radical stereo value used in SD and MOL files to internal spin multiplicity values used by MayaChemTools...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1080 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1081 sub MDLRadicalToInternalSpinMultiplicity {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1082 my($MDLRadical) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1083 my($InternalSpinMultiplicity);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1084
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1085 $InternalSpinMultiplicity = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1086
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1087 SPINMULTIPLICITY: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1088 if ($MDLRadical == 0) { $InternalSpinMultiplicity = 0; last SPINMULTIPLICITY;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1089 if ($MDLRadical == 1) { $InternalSpinMultiplicity = 1; last SPINMULTIPLICITY;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1090 if ($MDLRadical == 2) { $InternalSpinMultiplicity = 2; last SPINMULTIPLICITY;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1091 if ($MDLRadical == 3) { $InternalSpinMultiplicity = 3; last SPINMULTIPLICITY;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1092 $InternalSpinMultiplicity = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1093 carp "Warning: MDLRadicalToInternalSpinMultiplicity: MDL radical value, $MDLRadical, specifed on line M RAD is not supported...";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1094 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1095 return $InternalSpinMultiplicity;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1096 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1097
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1098 # Map internal spin multiplicity values used by MayaChemTools to MDL radical stereo value used in SD and MOL files...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1099 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1100 sub InternalSpinMultiplicityToMDLRadical {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1101 my($InternalSpinMultiplicity) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1102 my($MDLRadical);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1103
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1104 $MDLRadical = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1105
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1106 SPINMULTIPLICITY: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1107 if ($InternalSpinMultiplicity == 1) { $MDLRadical = 1; last SPINMULTIPLICITY;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1108 if ($InternalSpinMultiplicity == 2) { $MDLRadical = 2; last SPINMULTIPLICITY;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1109 if ($InternalSpinMultiplicity == 3) { $MDLRadical = 3; last SPINMULTIPLICITY;}
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1110 $MDLRadical = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1111 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1112 return $MDLRadical;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1113 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1114
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1115 # Process generic CTAB property line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1116 sub _ParseCmpdGenericPropertyLine {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1117 my($PropertyName, $Line) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1118
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1119 my($Label, $PropertyLabel, $ValuesCount, $ValuePairsCount, @ValuePairs);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1120
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1121 @ValuePairs = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1122 ($Label, $PropertyLabel, $ValuesCount, @ValuePairs) = split(' ', $Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1123 $ValuePairsCount = (scalar @ValuePairs)/2;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1124 if ($ValuesCount != $ValuePairsCount) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1125 carp "Warning: _ParseCmpdGenericPropertyLine: Number of atom number and $PropertyName value paris specified on $Label $PropertyLabel property line, $ValuePairsCount, does not match expected value of $ValuesCount...";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1126 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1127
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1128 return (@ValuePairs);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1129 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1130
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1131 # Generic CTAB property lines for charge, istope and radical properties...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1132 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1133 sub _GenerateCmpdGenericPropertyLines {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1134 my($PropertyName, $PropertyValuePairsRef) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1135 my($Index, $PropertyLabel, $Line, $PropertyCount, $AtomNum, $PropertyValue, @PropertyLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1136
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1137 @PropertyLines = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1138 NAME: {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1139 if ($PropertyName =~ /^Charge$/i) { $PropertyLabel = "M CHG"; last NAME; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1140 if ($PropertyName =~ /^Isotope$/i) { $PropertyLabel = "M ISO"; last NAME; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1141 if ($PropertyName =~ /^Radical$/i) { $PropertyLabel = "M RAD"; last NAME; }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1142 carp "Warning: _GenerateCmpdGenericPropertyLines: Unknown property name, $PropertyName, specified...";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1143 return @PropertyLines;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1144 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1145
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1146 # A maximum of 8 property pair values allowed per line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1147 $PropertyCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1148 $Line = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1149 for ($Index = 0; $Index < $#{$PropertyValuePairsRef}; $Index += 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1150 if ($PropertyCount > 8) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1151 # Setup property line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1152 $Line = "${PropertyLabel} 8${Line}";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1153 push @PropertyLines, $Line;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1154
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1155 $PropertyCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1156 $Line = '';
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1157 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1158 $PropertyCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1159 $AtomNum = $PropertyValuePairsRef->[$Index];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1160 $PropertyValue = $PropertyValuePairsRef->[$Index + 1];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1161 $Line .= sprintf " %3i %3i", $AtomNum, $PropertyValue;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1162 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1163 if ($Line) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1164 $Line = "${PropertyLabel} ${PropertyCount}${Line}";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1165 push @PropertyLines, $Line;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1166 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1167 return @PropertyLines;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1168 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1169
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1170 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1171 # Read compound data into a string and return its value
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1172 sub ReadCmpdString {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1173 my($SDFileRef) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1174 my($CmpdString);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1175
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1176 $CmpdString = "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1177 LINE: while (defined($_ = <$SDFileRef>)) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1178 # Change Windows and Mac new line char to UNIX...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1179 s/(\r\n)|(\r)/\n/g;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1180
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1181 if (/^\$\$\$\$/) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1182 # Take out any new line char at the end by explicitly removing it instead of using
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1183 # chomp, which might not always work correctly on files generated on a system
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1184 # with a value of input line separator different from the current system...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1185 s/\n$//g;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1186
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1187 # Doesn't hurt to chomp...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1188 chomp;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1189
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1190 $CmpdString .= $_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1191 last LINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1192 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1193 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1194 $CmpdString .= $_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1195 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1196 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1197 return $CmpdString;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1198 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1199
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1200 # Find out the number of fragements in the compounds. And for the compound with
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1201 # more than one fragment, remove all the others besides the largest one.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1202 sub WashCmpd {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1203 my($CmpdLines) = @_;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1204 my($WashedCmpdString, $FragmentCount, $Fragments);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1205
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1206 $WashedCmpdString = "";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1207 ($FragmentCount, $Fragments) = GetCmpdFragments($CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1208 if ($FragmentCount > 1) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1209 # Go over the compound data for the largest fragment including property
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1210 # data...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1211 my (@AllFragments, @LargestFragment, %LargestFragmentAtoms, @WashedCmpdLines, $Index, $LineIndex, $AtomCount, $BondCount, $NewAtomCount, $NewBondCount, $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo, $FirstNewAtomNum, $SecondNewAtomNum, $AtomNum, $ChiralFlag, $BondLine, $MENDLineIndex, $Line, $Value, @ValuePairs, @NewValuePairs, $ValuePairIndex, $NewAtomNum, @NewPropertyLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1212
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1213 @AllFragments = (); @LargestFragment = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1214 %LargestFragmentAtoms = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1215 @AllFragments = split "\n", $Fragments;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1216 @LargestFragment = split " ", $AllFragments[0];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1217 for $Index (0 .. $#LargestFragment) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1218 # Map old atom numbers to new atom numbers as the fragment atom numbers are sorted
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1219 # from lowest to highest old atom numbers...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1220 $LargestFragmentAtoms{$LargestFragment[$Index]} = $Index + 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1221 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1222 @WashedCmpdLines = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1223 push @WashedCmpdLines, @$CmpdLines[0], @$CmpdLines[1], @$CmpdLines[2], @$CmpdLines[3];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1224 ($AtomCount, $BondCount, $ChiralFlag) = ParseCmpdCountsLine(@$CmpdLines[3]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1225 $NewAtomCount = @LargestFragment;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1226 $NewBondCount = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1227 $AtomNum = 0;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1228 # Retrieve the largest fragment atom lines...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1229 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1230 $AtomNum++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1231 if ($LargestFragmentAtoms{$AtomNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1232 push @WashedCmpdLines, @$CmpdLines[$LineIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1233 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1234 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1235 # Retrieve the largest fragment bond lines...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1236 for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1237 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1238 if ($LargestFragmentAtoms{$FirstAtomNum} && $LargestFragmentAtoms{$SecondAtomNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1239 $NewBondCount++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1240 # Set up bond line with new atom number mapping...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1241 $FirstNewAtomNum = $LargestFragmentAtoms{$FirstAtomNum};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1242 $SecondNewAtomNum = $LargestFragmentAtoms{$SecondAtomNum};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1243 $BondLine = GenerateCmpdBondLine($FirstNewAtomNum, $SecondNewAtomNum, $BondType, $BondStereo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1244 push @WashedCmpdLines, $BondLine;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1245 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1246 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1247 # Get property lines for CHG, ISO and RAD label and map the old atom numbers to new
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1248 # atom numners; Others, property lines before M END line are skipped as atom numbers for
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1249 # other properties might not valid anymore...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1250 #
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1251 $MENDLineIndex = $LineIndex;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1252 LINE: for ($LineIndex = (4 + $AtomCount + $BondCount); $LineIndex < @$CmpdLines; $LineIndex++) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1253 $Line = @$CmpdLines[$LineIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1254 if ($Line =~ /^M END/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1255 push @WashedCmpdLines, "M END";
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1256 $MENDLineIndex = $LineIndex;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1257 last LINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1258 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1259
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1260 @ValuePairs = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1261 if ($Line =~ /^M CHG/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1262 @ValuePairs = ParseCmpdChargePropertyLine($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1263 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1264 elsif ($Line =~ /^M RAD/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1265 @ValuePairs = ParseCmpdRadicalPropertyLine($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1266 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1267 elsif ($Line =~ /^M ISO/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1268 @ValuePairs = ParseCmpdIsotopePropertyLine($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1269 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1270 elsif ($Line =~ /^A /i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1271 my($NextLine);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1272 $LineIndex++;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1273 $NextLine = @$CmpdLines[$LineIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1274 @ValuePairs = ParseCmpdAtomAliasPropertyLine($Line, $NextLine);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1275 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1276 else {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1277 next LINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1278 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1279
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1280 if (!@ValuePairs) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1281 next LINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1282 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1283
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1284 # Collect values for valid atom numbers with mapping to new atom numbers...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1285 @NewValuePairs = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1286 VALUEINDEX: for ($ValuePairIndex = 0; $ValuePairIndex < $#ValuePairs; $ValuePairIndex += 2) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1287 $AtomNum = $ValuePairs[$ValuePairIndex]; $Value = $ValuePairs[$ValuePairIndex + 1];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1288 if (!exists $LargestFragmentAtoms{$AtomNum}) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1289 next VALUEINDEX;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1290 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1291 $NewAtomNum = $LargestFragmentAtoms{$AtomNum};
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1292 push @NewValuePairs, ($NewAtomNum, $Value)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1293 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1294 if (!@NewValuePairs) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1295 next LINE;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1296 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1297 @NewPropertyLines = ();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1298 if ($Line =~ /^M CHG/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1299 @NewPropertyLines = GenerateCmpdChargePropertyLines(\@NewValuePairs);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1300 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1301 elsif ($Line =~ /^M RAD/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1302 @NewPropertyLines = GenerateCmpdRadicalPropertyLines(\@NewValuePairs);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1303 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1304 elsif ($Line =~ /^M ISO/i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1305 @NewPropertyLines = GenerateCmpdIsotopePropertyLines(\@NewValuePairs);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1306 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1307 elsif ($Line =~ /^A /i) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1308 @NewPropertyLines = GenerateCmpdAtomAliasPropertyLines(\@NewValuePairs);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1309 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1310 push @WashedCmpdLines, @NewPropertyLines;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1311 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1312
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1313 # Retrieve rest of the data label and value property data...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1314 for ($LineIndex = (1 + $MENDLineIndex); $LineIndex < @$CmpdLines; $LineIndex++) {
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1315 push @WashedCmpdLines, @$CmpdLines[$LineIndex];
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1316 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1317 # Update atom and bond count line...
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1318 $WashedCmpdLines[3] = GenerateCmpdCountsLine($NewAtomCount, $NewBondCount, $ChiralFlag);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1319
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1320 $WashedCmpdString = join "\n", @WashedCmpdLines;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1321 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1322 return ($FragmentCount, $Fragments, $WashedCmpdString);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1323 }
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1324
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1325 1;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1326
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1327 __END__
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1328
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1329 =head1 NAME
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1330
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1331 SDFileUtil
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1332
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1333 =head1 SYNOPSIS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1334
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1335 use SDFileUtil ;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1336
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1337 use SDFileUtil qw(:all);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1338
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1339 =head1 DESCRIPTION
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1340
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1341 B<SDFileUtil> module provides the following functions:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1342
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1343 GenerateCmpdAtomAliasPropertyLines, GenerateCmpdAtomLine, GenerateCmpdBondLine,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1344 GenerateCmpdChargePropertyLines, GenerateCmpdCommentsLine, GenerateCmpdCountsLine,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1345 GenerateCmpdDataHeaderLabelsAndValuesLines, GenerateCmpdIsotopePropertyLines,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1346 GenerateCmpdMiscInfoLine, GenerateCmpdMolNameLine,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1347 GenerateCmpdRadicalPropertyLines, GenerateEmptyCtabBlockLines,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1348 GenerateMiscLineDateStamp, GetAllAndCommonCmpdDataHeaderLabels,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1349 GetCmpdDataHeaderLabels, GetCmpdDataHeaderLabelsAndValues, GetCmpdFragments,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1350 GetCtabLinesCount, GetInvalidAtomNumbers, GetUnknownAtoms,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1351 InternalBondOrderToMDLBondType, InternalBondStereochemistryToMDLBondStereo,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1352 InternalChargeToMDLCharge, InternalSpinMultiplicityToMDLRadical, IsCmpd2D,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1353 IsCmpd3D, MDLBondStereoToInternalBondStereochemistry,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1354 MDLBondTypeToInternalBondOrder, MDLChargeToInternalCharge,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1355 MDLRadicalToInternalSpinMultiplicity, ParseCmpdAtomAliasPropertyLine,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1356 ParseCmpdAtomLine, ParseCmpdBondLine, ParseCmpdChargePropertyLine,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1357 ParseCmpdCommentsLine, ParseCmpdCountsLine, ParseCmpdIsotopePropertyLine,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1358 ParseCmpdMiscInfoLine, ParseCmpdMolNameLine, ParseCmpdRadicalPropertyLine,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1359 ReadCmpdString, RemoveCmpdDataHeaderLabelAndValue, WashCmpd
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1360
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1361 =head1 METHODS
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1362
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1363 =over 4
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1364
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1365 =item B<GenerateCmpdAtomAliasPropertyLines>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1366
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1367 @Lines = GenerateCmpdAtomAliasPropertyLines($AliasValuePairsRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1368
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1369 Returns a formatted atom alias property lines corresponding to successive pairs
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1370 of atom number and alias values specified by a refernce to an array. Two lines
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1371 are generate for each atom number and alias value pairs: First line - A <AtomNum>;
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1372 Second line:<AtomAlias>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1373
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1374 =item B<GenerateCmpdAtomLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1375
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1376 $Line = GenerateCmpdAtomLine($AtomSymbol, $AtomX, $AtomY,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1377 $AtomZ, [$MassDifference, $Charge, $StereoParity]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1378
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1379 Returns a formatted atom data line containing all the input values.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1380
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1381 =item B<GenerateCmpdBondLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1382
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1383 $Line = GenerateCmpdBondLine($FirstAtomNum, $SecondAtomNum,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1384 $BondType, [$BondStereo]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1385
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1386 Returns a formatted bond data line containing all the input values.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1387
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1388 =item B<GenerateCmpdChargePropertyLines>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1389
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1390 @Lines = GenerateCmpdChargePropertyLines($ChargeValuePairsRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1391
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1392 Returns a formatted M CHG property lines corresponding to successive pairs of
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1393 atom number and charge values specified by a refernce to an array.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1394
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1395 =item B<GenerateCmpdCommentsLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1396
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1397 $Line = GenerateCmpdCommentsLine($Comments);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1398
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1399 Returns a formatted comments data line.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1400
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1401 =item B<GenerateCmpdCountsLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1402
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1403 $Line = GenerateCmpdCountsLine($AtomCount, $BondCount,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1404 $ChiralFlag, [$PropertyCount, $Version]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1405
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1406 Returns a formatted line containing all the input values. The default values of 999
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1407 and V2000 are used for I<PropertyCount> and I<Version>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1408
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1409 =item B<GenerateCmpdDataHeaderLabelsAndValuesLines>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1410
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1411 @Lines = GenerateCmpdDataHeaderLabelsAndValuesLines(
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1412 $DataHeaderLabelsRef, $DataHeaderLabelsAndValuesRef,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1413 [$SortDataLabels]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1414
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1415 Returns formatted data lines containing header label and values lines corresponding to
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1416 all data header labels in array reference I<DataHeaderLabelsRef> with values in hash
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1417 reference I<DataHeaderLabelsAndValuesRef>. By default, data header labels are
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1418 not sorted and correspond to the label order in array reference I<DataHeaderLabelsRef>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1419
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1420 =item B<GenerateCmpdIsotopePropertyLines>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1421
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1422 @Lines = GenerateCmpdIsotopePropertyLines($IsotopeValuePairsRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1423
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1424 Returns a formatted M ISO property lines corresponding to successive pairs of
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1425 atom number and isotope values specified by a refernce to an array.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1426
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1427 =item B<GenerateCmpdMiscInfoLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1428
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1429 $Line = GenerateCmpdMiscInfoLine([$ProgramName, $UserInitial,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1430 $Code]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1431
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1432 Returns a formatted line containing specified user initial, program name, date and code.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1433 Default values are: I<ProgramName - MayaChem; UserInitial - NULL; Code - 2D>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1434
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1435 =item B<GenerateCmpdMolNameLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1436
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1437 $Line = GenerateCmpdMolNameLine($MolName);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1438
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1439 Returns a formatted molecule name data line.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1440
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1441 =item B<GenerateCmpdRadicalPropertyLines>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1442
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1443 @Lines = GenerateCmpdRadicalPropertyLines($RadicalValuePairsRef);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1444
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1445 Returns a formatted M CHG property lines corresponding to successive pairs of
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1446 atom number and multiplicity values specified by a refernce to an array.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1447
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1448 =item B<GenerateEmptyCtabBlockLines>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1449
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1450 $Lines = GenerateCmpdMiscInfoLine([$Date]);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1451
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1452 Returns formatted lines representing empty CTAB block.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1453
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1454 =item B<GenerateMiscLineDateStamp>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1455
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1456 $Line = GenerateMiscLineDateStamp();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1457
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1458 Returns date stamp for misc line.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1459
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1460 =item B<GetAllAndCommonCmpdDataHeaderLabels>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1461
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1462 ($CmpdCount, $DataFieldLabelsArrayRef,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1463 $CommonDataFieldLabelsArrayRef) =
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1464 GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1465
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1466 Returns number of comopunds, a reference to an array containing all unique data header
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1467 label and a reference to an array containing common data field labels for all compounds
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1468 in SD file.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1469
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1470 =item B<GetCmpdDataHeaderLabels>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1471
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1472 (@Labels) = GetCmpdDataHeaderLabels(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1473
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1474 Returns an array containg data header labels for a compound
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1475
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1476 =item B<GetCmpdDataHeaderLabelsAndValues>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1477
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1478 (%DataValues) = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1479
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1480 Returns a hash conating data header labes and values for a compound.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1481
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1482 =item B<GetCmpdFragments>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1483
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1484 ($FragmentCount, $FragmentString) = GetCmpdFragments(\@CmpLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1485
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1486 Figures out the number of disconnected fragments and return their values along
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1487 with the atom numbers in a string delimited by new line character. Fragment data
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1488 in B<FragmentString> is sorted on based on its size.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1489
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1490 =item B<GetCtabLinesCount>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1491
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1492 $CtabLinesCount = GetCtabLinesCount(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1493
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1494 Returns number of lines present between the 4th line and the line containg "M END".
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1495
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1496 =item B<GetInvalidAtomNumbers>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1497
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1498 ($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) =
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1499 GetInvalidAtomNumbers(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1500
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1501 Returns a list of values containing information about invalid atom numbers present
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1502 in block or atom property lines.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1503
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1504 =item B<GetUnknownAtoms>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1505
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1506 ($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) =
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1507 GetUnknownAtoms(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1508
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1509 Returns a list of values containing information about atoms which contain special element
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1510 symbols not present in the periodic table.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1511
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1512 =item B<InternalBondOrderToMDLBondType>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1513
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1514 $MDLBondType = InternalBondOrderToMDLBondType($InternalBondOrder);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1515
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1516 Returns value of I<MDLBondType> corresponding to I<InternalBondOrder>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1517
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1518 InternalBondOrder MDLBondType
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1519
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1520 1 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1521 2 2
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1522 3 3
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1523 1.5 4
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1524
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1525 =item B<InternalBondStereochemistryToMDLBondStereo>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1526
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1527 $MDLBondStereo = InternalBondStereochemistryToMDLBondStereo(
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1528 $InternalBondStereo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1529
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1530 Returns value of I<MDLBondStereo> corresponding to I<InternalBondStereo> using following
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1531 mapping:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1532
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1533 InternalBondStereo MDLBondStereo
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1534
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1535 Up 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1536 UpOrDown 4
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1537 Down 6
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1538 CisOrTrans 3
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1539 Other 0
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1540
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1541 =item B<InternalChargeToMDLCharge>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1542
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1543 $MDLCharge = InternalChargeToMDLCharge($InternalCharge);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1544
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1545 Returns value of I<MDLCharge> corresponding to I<InternalCharge> using following
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1546 mapping:
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1547
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1548 InternalCharge MDLCharge
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1549
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1550 3 1
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1551 2 2
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1552 1 3
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1553 -1 5
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1554 -2 6
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1555 -3 7
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1556
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1557 =item B<InternalSpinMultiplicityToMDLRadical>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1558
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1559 $MDLRadical = InternalSpinMultiplicityToMDLRadical(
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1560 $InternalSpinMultiplicity);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1561
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1562 Returns value of I<MDLRadical> corresponding to I<InternalSpinMultiplicity>. These
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1563 value are equivalent.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1564
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1565 =item B<MDLBondStereoToInternalBondType>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1566
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1567 $InternalBondType = MDLBondStereoToInternalBondType($MDLBondStereo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1568
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1569 Returns value of I<InternalBondType> corresponding to I<MDLBondStereo> using
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1570 mapping shown for B<InternalBondTypeToMDLBondStereo> function.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1571
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1572 =item B<IsCmpd2D>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1573
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1574 $Status = IsCmpd2D();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1575
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1576 Returns 1 or 0 based on whether z-coordinate of any atom is non-zero.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1577
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1578 =item B<IsCmpd3D>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1579
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1580 $Status = IsCmpd3D();
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1581
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1582 Returns 1 or 0 based on whether z-coordinate of any atom is non-zero.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1583
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1584 =item B<MDLBondStereoToInternalBondStereochemistry>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1585
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1586 $InternalBondStereo = MDLBondStereoToInternalBondStereochemistry(
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1587 $MDLBondStereo);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1588
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1589 Returns value of I<InternalBondStereo> corresponding to I<MDLBondStereo> using
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1590 mapping shown for B<InternalBondStereochemistryToMDLBondStereo> function.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1591
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1592 =item B<MDLBondTypeToInternalBondOrder>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1593
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1594 $InternalBondOrder = MDLBondTypeToInternalBondOrder($MDLBondType);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1595
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1596 Returns value of I<InternalBondOrder> corresponding to I<MDLBondType> using
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1597 mapping shown for B<InternalBondOrderToMDLBondType> function.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1598
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1599 =item B<MDLChargeToInternalCharge>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1600
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1601 $InternalCharge = MDLChargeToInternalCharge($MDLCharge);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1602
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1603 Returns value of I<$InternalCharge> corresponding to I<MDLCharge> using
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1604 mapping shown for B<InternalChargeToMDLCharge> function.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1605
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1606 =item B<MDLRadicalToInternalSpinMultiplicity>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1607
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1608 $InternalSpinMultiplicity = MDLRadicalToInternalSpinMultiplicity(
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1609 $MDLRadical);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1610
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1611 Returns value of I<InternalSpinMultiplicity> corresponding to I<MDLRadical>. These
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1612 value are equivalent.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1613
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1614 =item B<ParseCmpdAtomAliasPropertyLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1615
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1616 @AtomNumAndValuePairs = ParseCmpdAtomAliasPropertyLine(
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1617 $CurrentLine, $NexLine);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1618
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1619 Parses atom alias propery lines in CTAB generic properties block and returns an array
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1620 with successive pairs of values corresponding to atom number and its alias.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1621
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1622 =item B<ParseCmpdAtomLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1623
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1624 ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1625 $StereoParity) = ParseCmpdAtomLine($AtomDataLine);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1626
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1627 Parses compound data line containing atom information and returns a list
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1628 of values.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1629
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1630 =item B<ParseCmpdBondLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1631
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1632 ($FirstAtomNum, $SecondAtomNum, $BondType) =
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1633 ParseCmpdBondLine($BondDataLine);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1634
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1635 Parses compound data line containing bond information and returns a list of
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1636 values.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1637
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1638 =item B<ParseCmpdCommentsLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1639
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1640 $Comments = ParseCmpdCommentsLine($CommentsDataLine);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1641
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1642 Returns the comment string.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1643
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1644 =item B<ParseCmpdChargePropertyLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1645
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1646 @AtomNumAndValuePairs = ParseCmpdChargePropertyLine(
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1647 $ChargeDataLine);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1648
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1649 Parses charge propery line in CTAB generic properties block and returns an array
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1650 with successive pairs of values corresponding to atom number and its charge.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1651
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1652 =item B<ParseCmpdCountsLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1653
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1654 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) =
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1655 ParseCmpdCountsLine(\@CountDataLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1656
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1657 Returns a list of values containing count information.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1658
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1659 =item B<ParseCmpdMiscInfoLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1660
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1661 ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1662 $Energy, $RegistryNum) = ParseCmpdMiscInfoLine($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1663
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1664 Returns a list of values containing miscellaneous information.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1665
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1666 =item B<ParseCmpdIsotopePropertyLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1667
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1668 @AtomNumAndValuePairs = ParseCmpdIsotopePropertyLine(
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1669 $IsotopeDataLine);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1670
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1671 Parses isotopic propery line in CTAB generic properties block and returns an array
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1672 with successive pairs of values corresponding to atom number and absolute mass of
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1673 atom isotope.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1674
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1675 =item B<ParseCmpdMolNameLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1676
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1677 $MolName = ParseCmpdMolNameLine($Line);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1678
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1679 Returns a string containing molecule name.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1680
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1681 =item B<ParseCmpdRadicalPropertyLine>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1682
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1683 @AtomNumAndValuePairs = ParseCmpdRadicalPropertyLine(
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1684 $RadicalDataLine);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1685
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1686 Parses radical propery line in CTAB generic properties block and returns an array
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1687 with successive pairs of values corresponding to atom number and radical number
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1688 value.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1689
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1690 =item B<RemoveCmpdDataHeaderLabelAndValue>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1691
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1692 $NewCmpdString = RemoveCmpdDataHeaderLabelAndValue($CmpdString,
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1693 $DataHeaderLabel);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1694
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1695 Returns a B<NewCmpdString> after removing I<DataHeaderLabel> along with its
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1696 value from I<CmpdString>.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1697
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1698 =item B<ReadCmpdString>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1699
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1700 $CmpdString = ReadCmpdString(\*SDFILEHANDLE);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1701
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1702 Returns a string containing all the data lines for the next available compound
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1703 in an already open file indicated by SDFILEHANDLE. A NULL string is returned
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1704 on EOF.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1705
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1706 =item B<WashCmpd>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1707
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1708 ($FragmentCount, $Fragments, $WashedCmpdString) =
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1709 WashCmpd(\@CmpdLines);
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1710
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1711 Figures out the number of disconnected fragments and return their values along
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1712 with the atom numbers in a string delimited by new line character. Fragment data
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1713 in B<FragmentString> is sorted on based on its size.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1714
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1715 =back
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1716
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1717 =head1 AUTHOR
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1718
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1719 Manish Sud <msud@san.rr.com>
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1720
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1721 =head1 SEE ALSO
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1722
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1723 TextUtil.pm
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1724
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1725 =head1 COPYRIGHT
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1726
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1727 Copyright (C) 2015 Manish Sud. All rights reserved.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1728
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1729 This file is part of MayaChemTools.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1730
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1731 MayaChemTools is free software; you can redistribute it and/or modify it under
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1732 the terms of the GNU Lesser General Public License as published by the Free
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1733 Software Foundation; either version 3 of the License, or (at your option)
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1734 any later version.
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1735
73ae111cf86f Uploaded
deepakjadmin
parents:
diff changeset
1736 =cut