annotate mayachemtool/mayachemtools/lib/SDFileUtil.pm @ 0:68300206e90d draft default tip

Uploaded
author deepakjadmin
date Thu, 05 Nov 2015 02:41:30 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1 package SDFileUtil;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
2 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: SDFileUtil.pm,v $
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/02/28 20:47:18 $
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.49 $
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
6 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
8 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
10 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
12 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
17 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
22 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
27 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
28
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
30 use Exporter;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
31 use Carp;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
32 use PeriodicTable qw(IsElement);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
33 use TimeUtil ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
34
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
36
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
37 @ISA = qw(Exporter);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
38 @EXPORT = qw(GenerateCmpdAtomLine GenerateCmpdBondLine GenerateCmpdChargePropertyLines GenerateCmpdCommentsLine GenerateCmpdCountsLine GenerateCmpdAtomAliasPropertyLines GenerateCmpdIsotopePropertyLines GenerateCmpdDataHeaderLabelsAndValuesLines GenerateCmpdMiscInfoLine GenerateCmpdRadicalPropertyLines GenerateCmpdMolNameLine GenerateEmptyCtabBlockLines GenerateMiscLineDateStamp GetAllAndCommonCmpdDataHeaderLabels GetCmpdDataHeaderLabels GetCmpdDataHeaderLabelsAndValues GetCmpdFragments GetCtabLinesCount GetUnknownAtoms GetInvalidAtomNumbers MDLChargeToInternalCharge InternalChargeToMDLCharge MDLBondTypeToInternalBondOrder InternalBondOrderToMDLBondType MDLBondStereoToInternalBondStereochemistry InternalBondStereochemistryToMDLBondStereo InternalSpinMultiplicityToMDLRadical MDLRadicalToInternalSpinMultiplicity IsCmpd3D IsCmpd2D ParseCmpdAtomLine ParseCmpdBondLine ParseCmpdCommentsLine ParseCmpdCountsLine ParseCmpdMiscInfoLine ParseCmpdMolNameLine ParseCmpdAtomAliasPropertyLine ParseCmpdChargePropertyLine ParseCmpdIsotopePropertyLine ParseCmpdRadicalPropertyLine ReadCmpdString RemoveCmpdDataHeaderLabelAndValue WashCmpd);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
39 @EXPORT_OK = qw();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
40 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
41
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
42 # Format data for compounds count line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
43 sub GenerateCmpdCountsLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
44 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version, $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
45
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
46 if (@_ == 5) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
47 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
48 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
49 elsif (@_ == 3) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
50 ($AtomCount, $BondCount, $ChiralFlag) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
51 $PropertyCount = 999;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
52 $Version = "V2000";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
53 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
54 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
55 ($AtomCount, $BondCount) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
56 $ChiralFlag = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
57 $PropertyCount = 999;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
58 $Version = "V2000";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
59 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
60 if ($AtomCount > 999) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
61 croak "Error: SDFileUtil::GenerateCmpdCountsLine: The atom count, $AtomCount, exceeds maximum of 999 allowed for CTAB version 2000. The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools...";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
62 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
63 $Line = sprintf "%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%6s", $AtomCount, $BondCount, 0, 0, $ChiralFlag, 0, 0, 0, 0, 0, $PropertyCount, $Version;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
64
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
65 return ($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
66 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
67
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
68 # Generate comments line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
69 sub GenerateCmpdCommentsLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
70 my($Comments) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
71 my($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
72
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
73 $Line = (length($Comments) > 80) ? substr($Comments, 0, 80) : $Comments;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
74
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
75 return $Line;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
76 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
77
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
78 # Generate molname line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
79 sub GenerateCmpdMolNameLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
80 my($MolName) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
81 my($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
82
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
83 $Line = (length($MolName) > 80) ? substr($MolName, 0, 80) : $MolName;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
84
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
85 return $Line;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
86 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
87
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
88 # Generate data for compounds misc info line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
89 sub GenerateCmpdMiscInfoLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
90 my($ProgramName, $UserInitial, $Code) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
91 my($Date, $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
92
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
93 if (!(defined($ProgramName) && $ProgramName)) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
94 $ProgramName = "MayaChem";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
95 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
96 if (!(defined($UserInitial) && $UserInitial)) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
97 $UserInitial = " ";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
98 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
99 if (!(defined($Code) && $Code)) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
100 $Code = "2D";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
101 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
102
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
103 if (length($ProgramName) > 8) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
104 $ProgramName = substr($ProgramName, 0, 8);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
105 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
106 if (length($UserInitial) > 2) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
107 $UserInitial = substr($UserInitial, 0, 2);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
108 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
109 if (length($Code) > 2) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
110 $Code = substr($Code, 0, 2);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
111 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
112 $Date = GenerateMiscLineDateStamp();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
113
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
114 $Line = "${UserInitial}${ProgramName}${Date}${Code}";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
115
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
116 return $Line;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
117 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
118
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
119 # Generate data for compounds misc info line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
120 sub GenerateEmptyCtabBlockLines {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
121 my($Date, $Lines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
122
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
123 if (@_ == 1) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
124 ($Date) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
125 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
126 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
127 $Date = GenerateMiscLineDateStamp();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
128 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
129 # First line: Blank molname line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
130 # Second line: Misc info...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
131 # Third line: Blank comments line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
132 # Fourth line: Counts line reflecting empty structure data block...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
133 $Lines = "\n";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
134 $Lines .= " MayaChem${Date}2D\n";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
135 $Lines .= "\n";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
136 $Lines .= GenerateCmpdCountsLine(0, 0, 0) . "\n";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
137 $Lines .= "M END";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
138
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
139 return $Lines;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
140 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
141
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
142 # Generate SD file data stamp...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
143 sub GenerateMiscLineDateStamp {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
144 return TimeUtil::SDFileTimeStamp();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
145 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
146
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
147 # Generate data for compound atom line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
148 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
149 sub GenerateCmpdAtomLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
150 my($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
151 my($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
152
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
153 if (!defined $MassDifference) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
154 $MassDifference = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
155 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
156 if (!defined $Charge) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
157 $Charge = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
158 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
159 if (!defined $StereoParity) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
160 $StereoParity = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
161 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
162 $Line = sprintf "%10.4f%10.4f%10.4f %-3s%2i%3i%3i 0 0 0 0 0 0 0 0 0", $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
163
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
164 return $Line
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
165 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
166
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
167 # Generate data for compound bond line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
168 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
169 sub GenerateCmpdBondLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
170 my($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
171 my($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
172
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
173 if (!defined $BondStereo) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
174 $BondStereo = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
175 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
176 $Line = sprintf "%3i%3i%3i%3i 0 0 0", $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
177
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
178 return $Line
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
179 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
180
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
181 # Generate charge property lines for CTAB block...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
182 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
183 sub GenerateCmpdChargePropertyLines {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
184 my($ChargeValuePairsRef) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
185
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
186 return _GenerateCmpdGenericPropertyLines('Charge', $ChargeValuePairsRef);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
187 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
188
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
189 # Generate isotope property lines for CTAB block...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
190 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
191 sub GenerateCmpdIsotopePropertyLines {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
192 my($IsotopeValuePairsRef) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
193
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
194 return _GenerateCmpdGenericPropertyLines('Isotope', $IsotopeValuePairsRef);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
195 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
196
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
197 # Generate radical property line property lines for CTAB block...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
198 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
199 sub GenerateCmpdRadicalPropertyLines {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
200 my($RadicalValuePairsRef) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
201
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
202 return _GenerateCmpdGenericPropertyLines('Radical', $RadicalValuePairsRef);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
203 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
204
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
205 # Generate atom alias property line property lines for CTAB block...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
206 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
207 # Atom alias property line format:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
208 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
209 # A aaa
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
210 # x...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
211 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
212 # aaa: Atom number
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
213 # x: Atom alias in next line
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
214 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
215 sub GenerateCmpdAtomAliasPropertyLines {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
216 my($PropertyValuePairsRef) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
217 my($Index, $AtomNum, $AtomAlias, $Line, @PropertyLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
218
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
219 @PropertyLines = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
220
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
221 for ($Index = 0; $Index < $#{$PropertyValuePairsRef}; $Index += 2) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
222 $AtomNum = $PropertyValuePairsRef->[$Index];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
223 $AtomAlias = $PropertyValuePairsRef->[$Index + 1];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
224
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
225 $Line = "A " . sprintf "%3i", $AtomNum;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
226
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
227 push @PropertyLines, $Line;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
228 push @PropertyLines, $AtomAlias;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
229 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
230
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
231 return @PropertyLines;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
232 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
233
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
234 # Generate data header labels and values lines...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
235 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
236 sub GenerateCmpdDataHeaderLabelsAndValuesLines {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
237 my($DataHeaderLabelsRef, $DataHeaderLabelsAndValuesRef, $SortDataLabels) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
238 my($DataLabel, $DataValue, @DataLabels, @DataLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
239
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
240 if (!defined $SortDataLabels) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
241 $SortDataLabels = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
242 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
243
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
244 @DataLines = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
245 @DataLabels = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
246 if ($SortDataLabels) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
247 push @DataLabels, sort @{$DataHeaderLabelsRef};
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
248 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
249 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
250 push @DataLabels, @{$DataHeaderLabelsRef};
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
251 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
252 for $DataLabel (@DataLabels) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
253 $DataValue = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
254 if (exists $DataHeaderLabelsAndValuesRef->{$DataLabel}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
255 $DataValue = $DataHeaderLabelsAndValuesRef->{$DataLabel};
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
256 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
257 push @DataLines, ("> <${DataLabel}>", "$DataValue", "");
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
258 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
259 return @DataLines;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
260 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
261
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
262 # Parse data field header in SD file and return lists of all and common data field
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
263 # labels.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
264 sub GetAllAndCommonCmpdDataHeaderLabels {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
265 my($SDFileRef) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
266 my($CmpdCount, $CmpdString, $Label, @CmpdLines, @DataFieldLabels, @CommonDataFieldLabels, %DataFieldLabelsMap);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
267
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
268 $CmpdCount = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
269 @DataFieldLabels = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
270 @CommonDataFieldLabels = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
271 %DataFieldLabelsMap = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
272
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
273 while ($CmpdString = ReadCmpdString($SDFileRef)) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
274 $CmpdCount++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
275 @CmpdLines = split "\n", $CmpdString;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
276 # Process compound data header labels and figure out which ones are present for
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
277 # all the compounds...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
278 if (@DataFieldLabels) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
279 my (@CmpdDataFieldLabels) = GetCmpdDataHeaderLabels(\@CmpdLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
280 my(%CmpdDataFieldLabelsMap) = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
281 # Setup a map for the current labels...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
282 for $Label (@CmpdDataFieldLabels) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
283 $CmpdDataFieldLabelsMap{$Label} = "PresentInSome";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
284 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
285 # Check the presence old labels for this compound; otherwise, mark 'em new...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
286 for $Label (@DataFieldLabels) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
287 if (!$CmpdDataFieldLabelsMap{$Label}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
288 $DataFieldLabelsMap{$Label} = "PresentInSome";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
289 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
290 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
291 # Check the presence this compound in the old labels; otherwise, add 'em...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
292 for $Label (@CmpdDataFieldLabels ) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
293 if (!$DataFieldLabelsMap{$Label}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
294 # It's a new label...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
295 push @DataFieldLabels, $Label;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
296 $DataFieldLabelsMap{$Label} = "PresentInSome";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
297 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
298 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
299 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
300 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
301 # Get the initial label set and set up a map...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
302 @DataFieldLabels = GetCmpdDataHeaderLabels(\@CmpdLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
303 for $Label (@DataFieldLabels) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
304 $DataFieldLabelsMap{$Label} = "PresentInAll";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
305 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
306 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
307 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
308 # Identify the common data field labels...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
309 @CommonDataFieldLabels = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
310 for $Label (@DataFieldLabels) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
311 if ($DataFieldLabelsMap{$Label} eq "PresentInAll") {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
312 push @CommonDataFieldLabels, $Label;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
313 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
314 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
315 return ($CmpdCount, \@DataFieldLabels, \@CommonDataFieldLabels);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
316 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
317
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
318 # Parse all the data header labels and return 'em as an list...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
319 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
320 # Format:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
321 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
322 #> Data header line
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
323 #Data line(s)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
324 #Blank line
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
325 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
326 # [Data Header] (one line) precedes each item of data, starts with a greater than (>) sign, and
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
327 # contains at least one of the following:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
328 # The field name enclosed in angle brackets. For example: <melting.point>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
329 # The field number, DTn , where n represents the number assigned to the field in a MACCS-II database
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
330 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
331 #Optional information for the data header includes:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
332 # The compound’s external and internal registry numbers. External registry numbers must be enclosed in parentheses.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
333 # Any combination of information
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
334 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
335 #The following are examples of valid data headers:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
336 #> <MELTING.POINT>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
337 #> 55 (MD-08974) <BOILING.POINT> DT12
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
338 #> DT12 55
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
339 #> (MD-0894) <BOILING.POINT> FROM ARCHIVES
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
340 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
341 #Notes: Sometimes last blank line is missing and can be just followed by $$$$
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
342 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
343 sub GetCmpdDataHeaderLabels {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
344 my($CmpdLines) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
345 my($CmpdLine, $Label, @Labels);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
346
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
347 @Labels = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
348 CMPDLINE: for $CmpdLine (@$CmpdLines) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
349 if ($CmpdLine !~ /^>/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
350 next CMPDLINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
351 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
352 # Does the line contains field name enclosed in angular brackets?
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
353 ($Label) = $CmpdLine =~ /<.*?>/g;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
354 if (!defined($Label)) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
355 next CMPDLINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
356 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
357 $Label =~ s/(<|>)//g;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
358 push @Labels, $Label;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
359 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
360 return (@Labels);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
361 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
362
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
363 # Parse all the data header labels and values
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
364 sub GetCmpdDataHeaderLabelsAndValues {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
365 my($CmpdLines) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
366 my($CmpdLine, $CurrentLabel, $Label, $Value, $ValueCount, $ProcessingLabelData, @Values, %DataFields);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
367
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
368 %DataFields = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
369 $ProcessingLabelData = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
370 $ValueCount = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
371 CMPDLINE: for $CmpdLine (@$CmpdLines) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
372 if ($CmpdLine =~ /^\$\$\$\$/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
373 last CMPDLINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
374 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
375 if ($CmpdLine =~ /^>/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
376 # Does the line contains field name enclosed in angular brackets?
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
377 ($Label) = $CmpdLine =~ /<.*?>/g;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
378 if (defined $Label) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
379 $CurrentLabel = $Label;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
380 $CurrentLabel =~ s/(<|>)//g;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
381 $ProcessingLabelData = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
382 $ValueCount = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
383
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
384 if ($CurrentLabel) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
385 $ProcessingLabelData = 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
386 $DataFields{$CurrentLabel} = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
387 next CMPDLINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
388 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
389 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
390 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
391 if (!$ProcessingLabelData) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
392 # Data line containing no <label> as allowed by SDF format. Just ignore it...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
393 next CMPDLINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
394 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
395 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
396 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
397 if (!$ProcessingLabelData) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
398 next CMPDLINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
399 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
400 if (!(defined($CmpdLine) && length($CmpdLine))) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
401 # Blank line terminates value for a label...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
402 $CurrentLabel = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
403 $ValueCount = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
404 $ProcessingLabelData = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
405 next CMPDLINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
406 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
407 $ValueCount++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
408 $Value = $CmpdLine;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
409
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
410 if ($ValueCount > 1) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
411 $DataFields{$CurrentLabel} .= "\n" . $Value;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
412 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
413 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
414 $DataFields{$CurrentLabel} = $Value;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
415 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
416 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
417 return (%DataFields);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
418 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
419
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
420 # Return an updated compoud string after removing data header label along with its
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
421 # value from the specified compound string...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
422 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
423 sub RemoveCmpdDataHeaderLabelAndValue {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
424 my($CmpdString, $DataHeaderLabel) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
425 my($Line, $PorcessingDataHeaderLabel, @CmpdLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
426
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
427 @CmpdLines = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
428 $PorcessingDataHeaderLabel = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
429
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
430 CMPDLINE: for $Line (split "\n", $CmpdString) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
431 if ($Line =~ /^>/ && $Line =~ /<$DataHeaderLabel>/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
432 $PorcessingDataHeaderLabel = 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
433 next CMPDLINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
434 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
435
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
436 if ($PorcessingDataHeaderLabel) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
437 # Blank line indicates end of fingerprints data value...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
438 if ($Line =~ /^\$\$\$\$/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
439 push @CmpdLines, $Line;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
440 $PorcessingDataHeaderLabel = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
441 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
442 elsif (!length($Line)) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
443 $PorcessingDataHeaderLabel = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
444 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
445 next CMPDLINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
446 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
447
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
448 # Track compound lines without fingerprints data...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
449 push @CmpdLines, $Line;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
450 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
451
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
452 return join "\n", @CmpdLines;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
453 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
454
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
455 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
456 # Using bond blocks, figure out the number of disconnected fragments and
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
457 # return their values along with the atom numbers in a string delimited by new
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
458 # line character.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
459 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
460 sub GetCmpdFragments {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
461 my($CmpdLines) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
462 my($AtomCount, $BondCount, $FirstAtomNum, $SecondAtomNum, @AtomConnections, $BondType, $FragmentString, $FragmentCount, $LineIndex, $Index, $AtomNum, $NbrAtomNum, @ProcessedAtoms, $ProcessedAtomCount, $ProcessAtomNum, @ProcessingAtoms, @ConnectedAtoms, %Fragments, $FragmentNum, $AFragmentString);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
463
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
464 # Setup the connection table for each atom...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
465 @AtomConnections = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
466 ($AtomCount, $BondCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
467 for $AtomNum (1 .. $AtomCount) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
468 %{$AtomConnections[$AtomNum]} = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
469 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
470 for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
471 ($FirstAtomNum, $SecondAtomNum, $BondType) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
472 if (!$AtomConnections[$FirstAtomNum]{$SecondAtomNum}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
473 $AtomConnections[$FirstAtomNum]{$SecondAtomNum} = $BondType;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
474 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
475 if (!$AtomConnections[$SecondAtomNum]{$FirstAtomNum}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
476 $AtomConnections[$SecondAtomNum]{$FirstAtomNum} = $BondType;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
477 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
478 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
479
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
480 #Get set to count fragments...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
481 $ProcessedAtomCount = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
482 $FragmentNum = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
483 %Fragments = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
484 @ProcessedAtoms = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
485 for $AtomNum (1 .. $AtomCount) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
486 $ProcessedAtoms[$AtomNum] = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
487 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
488 while ($ProcessedAtomCount < $AtomCount) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
489 @ProcessingAtoms = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
490 @ConnectedAtoms = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
491 ATOMNUM: for $AtomNum (1 .. $AtomCount) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
492 if (!$ProcessedAtoms[$AtomNum]) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
493 $ProcessedAtomCount++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
494 $ProcessedAtoms[$AtomNum] = 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
495 push @ProcessingAtoms, $AtomNum;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
496 $FragmentNum++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
497 @{$Fragments{$FragmentNum} } = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
498 push @{$Fragments{$FragmentNum} }, $AtomNum;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
499 last ATOMNUM;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
500 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
501 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
502
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
503 # Go over the neighbors and follow the connection trail while collecting the
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
504 # atoms numbers present in the connected fragment...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
505 while (@ProcessingAtoms) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
506 for ($Index = 0; $Index < @ProcessingAtoms; $Index++) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
507 $ProcessAtomNum = $ProcessingAtoms[$Index];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
508 for $NbrAtomNum (keys %{$AtomConnections[$ProcessAtomNum]}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
509 if (!$ProcessedAtoms[$NbrAtomNum]) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
510 $ProcessedAtomCount++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
511 $ProcessedAtoms[$NbrAtomNum] = 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
512 push @ConnectedAtoms, $NbrAtomNum;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
513 push @{ $Fragments{$FragmentNum} }, $NbrAtomNum;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
514 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
515 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
516 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
517 @ProcessingAtoms = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
518 @ProcessingAtoms = @ConnectedAtoms;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
519 @ConnectedAtoms = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
520 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
521 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
522 $FragmentCount = $FragmentNum;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
523 $FragmentString = "";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
524
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
525 # Sort out the fragments by size...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
526 for $FragmentNum (sort { @{$Fragments{$b}} <=> @{$Fragments{$a}} } keys %Fragments ) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
527 # Sort the atoms in a fragment by their numbers...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
528 $AFragmentString = join " ", sort { $a <=> $b } @{ $Fragments{$FragmentNum} };
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
529 if ($FragmentString) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
530 $FragmentString .= "\n" . $AFragmentString;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
531 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
532 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
533 $FragmentString = $AFragmentString;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
534 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
535 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
536 return ($FragmentCount, $FragmentString);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
537 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
538
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
539 # Count number of lines present in between 4th and line containg "M END"
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
540 sub GetCtabLinesCount {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
541 my($CmpdLines) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
542 my($LineIndex, $CtabLinesCount);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
543
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
544 $CtabLinesCount = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
545 LINE: for ($LineIndex = 4; $LineIndex < @$CmpdLines; $LineIndex++) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
546 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
547 # Any line after atom and bond data starting with anything other than space or
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
548 # a digit indicates end of Ctab atom/bond data block...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
549 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
550 if (@$CmpdLines[$LineIndex] !~ /^[0-9 ]/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
551 $CtabLinesCount = $LineIndex - 4;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
552 last LINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
553 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
554 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
555 return $CtabLinesCount;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
556 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
557
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
558 # Using atom blocks, count the number of atoms which contain special element
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
559 # symbols not present in the periodic table.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
560 sub GetUnknownAtoms {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
561 my($CmpdLines) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
562 my($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines, $LineIndex, $AtomCount, $AtomSymbol);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
563
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
564 $UnknownAtomCount = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
565 $UnknownAtoms = "";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
566 $UnknownAtomLines = "";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
567 ($AtomCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
568 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
569 ($AtomSymbol) = ParseCmpdAtomLine(@$CmpdLines[$LineIndex]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
570 if (!IsElement($AtomSymbol)) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
571 $UnknownAtomCount++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
572 $UnknownAtoms .= " $AtomSymbol";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
573 if ($UnknownAtomLines) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
574 $UnknownAtomLines .= "\n" . @$CmpdLines[$LineIndex];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
575 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
576 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
577 $UnknownAtomLines = @$CmpdLines[$LineIndex];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
578 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
579 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
580 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
581 return ($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
582 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
583
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
584 # Check z coordinates of all atoms to see whether any of them is non-zero
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
585 # which makes the compound geometry three dimensional...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
586 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
587 sub IsCmpd3D {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
588 my($CmpdLines) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
589 my($LineIndex, $AtomCount, $AtomSymbol, $AtomX, $AtomY, $AtomZ);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
590
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
591 ($AtomCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
592 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
593 ($AtomSymbol, $AtomX, $AtomY, $AtomZ) = ParseCmpdAtomLine(@$CmpdLines[$LineIndex]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
594 if ($AtomZ != 0) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
595 return 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
596 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
597 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
598 return 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
599 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
600
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
601 # Check whether it's a 2D compound...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
602 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
603 sub IsCmpd2D {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
604 my($CmpdLines) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
605
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
606 return IsCmpd3D($CmpdLines) ? 0 : 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
607 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
608
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
609 # Using bond blocks, count the number of bond lines which contain atom numbers
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
610 # greater than atom count specified in compound count line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
611 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
612 sub GetInvalidAtomNumbers {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
613 my($CmpdLines) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
614 my($LineIndex, $AtomCount, $BondCount, $FirstAtomNum, $SecondAtomNum, $InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines, $Line, $InvalidAtomPropertyLine, $ValuePairIndex, $AtomNum, $Value, @ValuePairs);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
615
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
616 ($AtomCount, $BondCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
617
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
618 $InvalidAtomNumbersCount = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
619 $InvalidAtomNumbers = "";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
620 $InvalidAtomNumberLines = "";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
621
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
622 # Go over bond block lines...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
623 LINE: for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
624 ($FirstAtomNum, $SecondAtomNum) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
625 if ($FirstAtomNum <= $AtomCount && $SecondAtomNum <= $AtomCount) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
626 next LINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
627 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
628 if ($FirstAtomNum > $AtomCount) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
629 $InvalidAtomNumbersCount++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
630 $InvalidAtomNumbers .= " $FirstAtomNum";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
631 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
632 if ($SecondAtomNum > $AtomCount) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
633 $InvalidAtomNumbersCount++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
634 $InvalidAtomNumbers .= " $SecondAtomNum";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
635 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
636 if ($InvalidAtomNumberLines) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
637 $InvalidAtomNumberLines .= "\n" . @$CmpdLines[$LineIndex];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
638 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
639 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
640 $InvalidAtomNumberLines = @$CmpdLines[$LineIndex];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
641 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
642 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
643 # Go over property lines before M END...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
644 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
645 LINE: for ($LineIndex = (4 + $AtomCount + $BondCount); $LineIndex < @$CmpdLines; $LineIndex++) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
646 $Line = @$CmpdLines[$LineIndex];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
647 @ValuePairs = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
648 if ($Line =~ /^M END/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
649 last LINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
650 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
651 @ValuePairs = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
652 if ($Line =~ /^M CHG/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
653 @ValuePairs = ParseCmpdChargePropertyLine($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
654 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
655 elsif ($Line =~ /^M RAD/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
656 @ValuePairs = ParseCmpdRadicalPropertyLine($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
657 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
658 elsif ($Line =~ /^M ISO/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
659 @ValuePairs = ParseCmpdIsotopePropertyLine($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
660 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
661 elsif ($Line =~ /^A /i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
662 my($NextLine);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
663 $LineIndex++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
664 $NextLine = @$CmpdLines[$LineIndex];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
665 @ValuePairs = ParseCmpdAtomAliasPropertyLine($Line, $NextLine);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
666 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
667 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
668 next LINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
669 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
670
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
671 $InvalidAtomPropertyLine = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
672 for ($ValuePairIndex = 0; $ValuePairIndex < $#ValuePairs; $ValuePairIndex += 2) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
673 $AtomNum = $ValuePairs[$ValuePairIndex]; $Value = $ValuePairs[$ValuePairIndex + 1];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
674 if ($AtomNum > $AtomCount) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
675 $InvalidAtomPropertyLine = 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
676 $InvalidAtomNumbersCount++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
677 $InvalidAtomNumbers .= " $AtomNum";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
678 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
679 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
680 if ($InvalidAtomPropertyLine) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
681 if ($InvalidAtomNumberLines) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
682 $InvalidAtomNumberLines .= "\n" . $Line;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
683 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
684 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
685 $InvalidAtomNumberLines = $Line;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
686 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
687 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
688 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
689
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
690 return ($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
691 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
692
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
693 # Ctab lines: Atom block
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
694 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
695 # Format: xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
696 # A10 A10 A10 xA3 A2A3 A3 A3 A3 A3 A3 A3 A3 A3 A3 A3
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
697 # x,y,z: Atom coordinates
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
698 # aaa: Atom symbol. Entry in periodic table or L for atom list, A, Q, * for unspecified
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
699 # atom, and LP for lone pair, or R# for Rgroup label
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
700 # dd: Mass difference. -3, -2, -1, 0, 1, 2, 3, 4 (0 for value beyond these limits)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
701 # ccc: Charge. 0 = uncharged or value other than these, 1 = +3, 2 = +2, 3 = +1,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
702 # 4 = doublet radical, 5 = -1, 6 = -2, 7 = -3
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
703 # sss: Atom stereo parity. 0 = not stereo, 1 = odd, 2 = even, 3 = either or unmarked stereo center
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
704 # hhh: Hydrogen count + 1. 1 = H0, 2 = H1, 3 = H2, 4 = H3, 5 = H4
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
705 # bbb: Stereo care box. 0 = ignore stereo configuration of this double bond atom, 1 = stereo
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
706 # configuration of double bond atom must match
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
707 # vvv: Valence. 0 = no marking (default)(1 to 14) = (1 to 14) 15 = zero valence
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
708 # HHH: H0 designator. 0 = not specified, 1 = no H atoms allowed (redundant due to hhh)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
709 # rrr: Not used
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
710 # iii: Not used
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
711 # mmm: Atom-atom mapping number. 1 - number of atoms
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
712 # nnn: Inversion/retention flag. 0 = property not applied, 1 = configuration is inverted,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
713 # 2 = configuration is retained.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
714 # eee: Exact change flag. 0 = property not applied, 1 = change on atom must be
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
715 # exactly as shown
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
716 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
717 # Notes:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
718 # . StereoParity: 1 - ClockwiseStereo, 2 - AntiClockwiseStereo; 3 - Either; 0 - none. These
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
719 # values determine chirailty around the chiral center; a non zero value indicates atom
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
720 # has been marked as chiral center.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
721 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
722 sub ParseCmpdAtomLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
723 my($Line) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
724 my ($LineIndex, $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
725
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
726 ($AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity) = ('') x 7;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
727 if (length($Line) > 31) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
728 ($AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity) = unpack("A10A10A10xA3A2A3A3", $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
729 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
730 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
731 ($AtomX, $AtomY, $AtomZ, $AtomSymbol) = unpack("A10A10A10", $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
732 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
733 return ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
734 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
735
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
736 # Map MDL charge value used in SD and MOL files to internal charge used by MayaChemTools.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
737 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
738 sub MDLChargeToInternalCharge {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
739 my($MDLCharge) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
740 my($InternalCharge);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
741
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
742 CHARGE: {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
743 if ($MDLCharge == 0) { $InternalCharge = 0; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
744 if ($MDLCharge == 1) { $InternalCharge = 3; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
745 if ($MDLCharge == 2) { $InternalCharge = 2; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
746 if ($MDLCharge == 3) { $InternalCharge = 1; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
747 if ($MDLCharge == 5) { $InternalCharge = -1; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
748 if ($MDLCharge == 6) { $InternalCharge = -2; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
749 if ($MDLCharge == 7) { $InternalCharge = -3; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
750 # All other MDL charge values, including 4 corresponding to "doublet radical",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
751 # are assigned internal value of 0.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
752 $InternalCharge = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
753 if ($MDLCharge != 4) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
754 carp "Warning: MDLChargeToInternalCharge: MDL charge value, $MDLCharge, is not supported: An internal charge value, 0, has been assigned...";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
755 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
756 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
757 return $InternalCharge;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
758 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
759
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
760 # Map internal charge used by MayaChemTools to MDL charge value used in SD and MOL files.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
761 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
762 sub InternalChargeToMDLCharge {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
763 my($InternalCharge) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
764 my($MDLCharge);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
765
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
766 CHARGE: {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
767 if ($InternalCharge == 3) { $MDLCharge = 1; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
768 if ($InternalCharge == 2) { $MDLCharge = 2; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
769 if ($InternalCharge == 1) { $MDLCharge = 3; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
770 if ($InternalCharge == -1) { $MDLCharge = 5; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
771 if ($InternalCharge == -2) { $MDLCharge = 6; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
772 if ($InternalCharge == -3) { $MDLCharge = 7; last CHARGE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
773 # All other MDL charge values, including 4 corresponding to "doublet radical",
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
774 # are assigned internal value of 0.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
775 $MDLCharge = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
776 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
777 return $MDLCharge;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
778 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
779
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
780 # Ctab lines: Bond block
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
781 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
782 # Format: 111222tttsssxxxrrrccc
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
783 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
784 # 111: First atom number.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
785 # 222: Second atom number.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
786 # ttt: Bond type. 1 = Single, 2 = Double, 3 = Triple, 4 = Aromatic, 5 = Single or Double,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
787 # 6 = Single or Aromatic, 7 = Double or Aromatic, 8 = Any
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
788 # sss: Bond stereo. Single bonds: 0 = not stereo, 1 = Up, 4 = Either, 6 = Down,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
789 # Double bonds: 0 = Use x-, y-, z-coords from atom block to determine cis or trans,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
790 # 3 = Cis or trans (either) double bond
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
791 # xxx: Not used
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
792 # rrr: Bond topology. 0 = Either, 1 = Ring, 2 = Chain
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
793 # ccc: Reacting center status. 0 = unmarked, 1 = a center, -1 = not a center,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
794 # Additional: 2 = no change,4 = bond made/broken, 8 = bond order changes 12 = 4+8
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
795 # (both made/broken and changes); 5 = (4 + 1), 9 = (8 + 1), and 13 = (12 + 1) are also possible
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
796 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
797 sub ParseCmpdBondLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
798 my($Line) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
799 my($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
800
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
801 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = map {s/ //g; $_} unpack("A3A3A3A3", $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
802 return ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
803 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
804
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
805 # Map MDL bond type value used in SD and MOL files to internal bond order and bond types
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
806 # values used by MayaChemTools...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
807 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
808 sub MDLBondTypeToInternalBondOrder {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
809 my($MDLBondType) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
810 my($InternalBondOrder, $InternalBondType);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
811
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
812 $InternalBondType = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
813
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
814 BONDTYPE: {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
815 if ($MDLBondType == 1) { $InternalBondOrder = 1; $InternalBondType = 'Single'; last BONDTYPE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
816 if ($MDLBondType == 2) { $InternalBondOrder = 2; $InternalBondType = 'Double'; last BONDTYPE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
817 if ($MDLBondType == 3) { $InternalBondOrder = 3; $InternalBondType = 'Triple'; last BONDTYPE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
818 if ($MDLBondType == 4) { $InternalBondOrder = 1.5; $InternalBondType = 'Aromatic'; last BONDTYPE;} # Aromatic
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
819 if ($MDLBondType == 5) { $InternalBondOrder = 1; $InternalBondType = 'SingleOrDouble'; last BONDTYPE;} # Aromatic
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
820 if ($MDLBondType == 6) { $InternalBondOrder = 1; $InternalBondType = 'SingleOrAromatic'; last BONDTYPE;} # Aromatic
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
821 if ($MDLBondType == 7) { $InternalBondOrder = 2; $InternalBondType = 'DoubleOrAromatic'; last BONDTYPE;} # Aromatic
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
822 if ($MDLBondType == 8) { $InternalBondOrder = 1; $InternalBondType = 'Any'; last BONDTYPE;} # Aromatic
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
823 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
824 # Although MDL aromatic bond values are used for query only and explicit Kekule bond order
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
825 # values must be assigned, internal value of 1.5 is allowed to indicate aromatic bond orders.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
826 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
827 # All other MDL bond type values - 5 = Single or Double, 6 = Single or Aromatic, 7 = Double or Aromatic,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
828 # 8 = Any - are also assigned appropriate internal value of 1: These are meant to be used for
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
829 # structure queries by MDL products.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
830 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
831 $InternalBondOrder = 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
832 $InternalBondType = 'Single';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
833
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
834 carp "Warning: MDLBondTypeToInternalBondOrder: MDL bond type value, $MDLBondType, is not supported: An internal bond order value, 0, has been assigned...";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
835 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
836 return ($InternalBondOrder, $InternalBondType);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
837 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
838
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
839 # Map internal bond order and bond type values used by MayaChemTools to MDL bond type value used
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
840 # in SD and MOL files...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
841 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
842 sub InternalBondOrderToMDLBondType {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
843 my($InternalBondOrder, $InternalBondType) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
844 my($MDLBondType);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
845
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
846 BONDTYPE: {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
847 if ($InternalBondOrder == 1) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
848 if ($InternalBondType =~ /^SingleOrDouble$/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
849 $MDLBondType = 5;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
850 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
851 elsif ($InternalBondType =~ /^SingleOrAromatic$/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
852 $MDLBondType = 6;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
853 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
854 elsif ($InternalBondType =~ /^Any$/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
855 $MDLBondType = 8;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
856 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
857 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
858 $MDLBondType = 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
859 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
860 $MDLBondType = 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
861 last BONDTYPE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
862 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
863 if ($InternalBondOrder == 2) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
864 if ($InternalBondType =~ /^DoubleOrAromatic$/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
865 $MDLBondType = 7;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
866 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
867 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
868 $MDLBondType = 2;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
869 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
870 last BONDTYPE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
871 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
872 if ($InternalBondOrder == 3) { $MDLBondType = 3; last BONDTYPE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
873 if ($InternalBondOrder == 1.5) { $MDLBondType = 4; last BONDTYPE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
874 if ($InternalBondType =~ /^Any$/i) { $MDLBondType = 8; last BONDTYPE;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
875
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
876 $MDLBondType = 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
877
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
878 carp "Warning: InternalBondOrderToMDLBondType: Internal bond order and type values, $InternalBondOrder and $InternalBondType, don't match any valid MDL bond type: MDL bond type value, 1, has been assigned...";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
879 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
880 return $MDLBondType;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
881 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
882
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
883 # Third line: Comments - A blank line is also allowed.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
884 sub ParseCmpdCommentsLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
885 my($Line) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
886 my($Comments);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
887
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
888 $Comments = unpack("A80", $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
889
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
890 return ($Comments);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
891 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
892
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
893 # Map MDL bond stereo value used in SD and MOL files to internal bond stereochemistry values used by MayaChemTools...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
894 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
895 sub MDLBondStereoToInternalBondStereochemistry {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
896 my($MDLBondStereo) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
897 my($InternalBondStereo);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
898
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
899 $InternalBondStereo = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
900
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
901 BONDSTEREO: {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
902 if ($MDLBondStereo == 1) { $InternalBondStereo = 'Up'; last BONDSTEREO;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
903 if ($MDLBondStereo == 4) { $InternalBondStereo = 'UpOrDown'; last BONDSTEREO;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
904 if ($MDLBondStereo == 6) { $InternalBondStereo = 'Down'; last BONDSTEREO;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
905 if ($MDLBondStereo == 3) { $InternalBondStereo = 'CisOrTrans'; last BONDSTEREO;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
906 if ($MDLBondStereo == 0) { $InternalBondStereo = 'None'; last BONDSTEREO;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
907
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
908 $InternalBondStereo = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
909 carp "Warning: MDLBondStereoToInternalBondType: MDL bond stereo value, $MDLBondStereo, is not supported: It has been ignored and bond order would be used to determine bond type...";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
910 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
911 return $InternalBondStereo;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
912 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
913
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
914 # Map internal bond stereochemistry values used by MayaChemTools to MDL bond stereo value used in SD and MOL files...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
915 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
916 sub InternalBondStereochemistryToMDLBondStereo {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
917 my($InternalBondStereo) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
918 my($MDLBondStereo);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
919
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
920 $MDLBondStereo = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
921
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
922 BONDSTEREO: {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
923 if ($InternalBondStereo =~ /^Up$/i) { $MDLBondStereo = 1; last BONDSTEREO;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
924 if ($InternalBondStereo =~ /^UpOrDown$/i) { $MDLBondStereo = 4; last BONDSTEREO;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
925 if ($InternalBondStereo =~ /^Down$/) { $MDLBondStereo = 6; last BONDSTEREO;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
926 if ($InternalBondStereo =~ /^CisOrTrans$/) { $MDLBondStereo = 3; last BONDSTEREO;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
927
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
928 $MDLBondStereo = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
929 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
930 return $MDLBondStereo;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
931 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
932
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
933 # Fourth line: Counts
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
934 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
935 # Format: aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
936 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
937 # aaa: number of atoms; bbb: number of bonds; lll: number of atom lists; fff: (obsolete)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
938 # ccc: chiral flag: 0=not chiral, 1=chiral; sss: number of stext entries; xxx,rrr,ppp,iii:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
939 # (obsolete); mmm: number of lines of additional properties, including the M END line, No
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
940 # longer supported, default is set to 999; vvvvvv: version
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
941
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
942 sub ParseCmpdCountsLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
943 my($Line) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
944 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
945
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
946 if (length($Line) >= 39) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
947 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = unpack("A3A3x3x3A3x3x3x3x3x3A3A6", $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
948 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
949 elsif (length($Line) >= 15) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
950 ($PropertyCount, $Version) = ("999", "v2000");
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
951 ($AtomCount, $BondCount, $ChiralFlag) = unpack("A3A3x3x3A3", $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
952 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
953 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
954 ($ChiralFlag, $PropertyCount, $Version) = ("0", "999", "v2000");
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
955 ($AtomCount, $BondCount) = unpack("A3A3", $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
956 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
957
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
958 if ($Version =~ /V3000/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
959 # Current version of MayaChemTools modules and classes for processing MDL MOL and SD don't support
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
960 # V3000. So instead of relying on callers, just exit with an error to disable any processing of V3000
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
961 # format.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
962 croak "Error: SDFileUtil::ParseCmpdCountsLine: The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools...";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
963 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
964
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
965 return ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
966 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
967
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
968 # Second line: Misc info
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
969 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
970 # Format: IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
971 # A2A8 A10 A2I2A10 A12 A6
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
972 # User's first and last initials (I), program name (P), date/time (M/D/Y,H:m),
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
973 # dimensional codes - 2D or 3D (d),scaling factors (S, s), energy (E) if modeling program input,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
974 # internal registry number (R) if input through MDL form. A blank line is also allowed.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
975 sub ParseCmpdMiscInfoLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
976 my($Line) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
977 my($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
978
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
979 ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum) = unpack("A2A8A10A2A2A10A12A6", $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
980 return ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
981 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
982
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
983 # First line: Molecule name. This line is unformatted, but like all other lines in a
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
984 # molfile may not extend beyond column 80. A blank line is also allowed.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
985 sub ParseCmpdMolNameLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
986 my($Line) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
987 my($MolName);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
988
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
989 $MolName = unpack("A80", $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
990
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
991 return ($MolName);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
992 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
993
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
994 # Parse atom alias property line in CTAB generic properties block.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
995 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
996 # Atom alias property line format:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
997 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
998 # A aaa
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
999 # x...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1000 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1001 # aaa: Atom number
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1002 # x: Atom alias in next line
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1003 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1004 sub ParseCmpdAtomAliasPropertyLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1005 my($Line, $NextLine) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1006 my($Label, $AtomNumber, $AtomAlias);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1007
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1008 ($Label, $AtomNumber) = split(' ', $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1009 $AtomAlias = $NextLine;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1010
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1011 if (!$AtomAlias) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1012 carp "Warning: _ParseCmpdAtomAliasPropertyLine: No atom alias value specified on the line following atom alias property line...";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1013 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1014
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1015 return ($AtomNumber, $AtomAlias);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1016 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1017
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1018 # Parse charge property line in CTAB generic properties block.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1019 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1020 # Charge property line format:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1021 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1022 # M CHGnn8 aaa vvv ...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1023 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1024 # nn8: Number of value pairs. Maximum of 8 pairs allowed.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1025 # aaa: Atom number
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1026 # vvv: -15 to +15. Default of 0 = uncharged atom. When present, this property supersedes
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1027 # all charge and radical values in the atom block, forcing a 0 charge on all atoms not
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1028 # listed in an M CHG or M RAD line.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1029 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1030 sub ParseCmpdChargePropertyLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1031 my($Line) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1032
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1033 return _ParseCmpdGenericPropertyLine('Charge', $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1034 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1035
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1036
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1037 # Parse isotope property line in CTAB generic properties block.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1038 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1039 # Isoptope property line format:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1040 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1041 # M ISOnn8 aaa vvv ...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1042 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1043 # nn8: Number of value paris. Maximum of 8 pairs allowed.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1044 # aaa: Atom number
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1045 # vvv: Absolute mass of the atom isotope as a positive integer. When present, this property
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1046 # supersedes all isotope values in the atom block. Default (no entry) means natural
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1047 # abundance. The difference between this absolute mass value and the natural
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1048 # abundance value specified in the PTABLE.DAT file must be within the range of -18
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1049 # to +12
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1050 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1051 # Notes:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1052 # . Values correspond to mass numbers...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1053 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1054 sub ParseCmpdIsotopePropertyLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1055 my($Line) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1056
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1057 return _ParseCmpdGenericPropertyLine('Isotope', $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1058 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1059
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1060 # Parse radical property line in CTAB generic properties block.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1061 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1062 # Radical property line format:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1063 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1064 # M RADnn8 aaa vvv ...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1065 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1066 # nn8: Number of value paris. Maximum of 8 pairs allowed.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1067 # aaa: Atom number
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1068 # vvv: Default of 0 = no radical, 1 = singlet, 2 = doublet, 3 = triplet . When
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1069 # present, this property supersedes all charge and radical values in the atom block,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1070 # forcing a 0 (zero) charge and radical on all atoms not listed in an M CHG or
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1071 # M RAD line.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1072 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1073 sub ParseCmpdRadicalPropertyLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1074 my($Line) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1075
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1076 return _ParseCmpdGenericPropertyLine('Radical', $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1077 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1078
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1079 # Map MDL radical stereo value used in SD and MOL files to internal spin multiplicity values used by MayaChemTools...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1080 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1081 sub MDLRadicalToInternalSpinMultiplicity {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1082 my($MDLRadical) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1083 my($InternalSpinMultiplicity);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1084
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1085 $InternalSpinMultiplicity = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1086
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1087 SPINMULTIPLICITY: {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1088 if ($MDLRadical == 0) { $InternalSpinMultiplicity = 0; last SPINMULTIPLICITY;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1089 if ($MDLRadical == 1) { $InternalSpinMultiplicity = 1; last SPINMULTIPLICITY;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1090 if ($MDLRadical == 2) { $InternalSpinMultiplicity = 2; last SPINMULTIPLICITY;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1091 if ($MDLRadical == 3) { $InternalSpinMultiplicity = 3; last SPINMULTIPLICITY;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1092 $InternalSpinMultiplicity = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1093 carp "Warning: MDLRadicalToInternalSpinMultiplicity: MDL radical value, $MDLRadical, specifed on line M RAD is not supported...";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1094 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1095 return $InternalSpinMultiplicity;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1096 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1097
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1098 # Map internal spin multiplicity values used by MayaChemTools to MDL radical stereo value used in SD and MOL files...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1099 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1100 sub InternalSpinMultiplicityToMDLRadical {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1101 my($InternalSpinMultiplicity) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1102 my($MDLRadical);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1103
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1104 $MDLRadical = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1105
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1106 SPINMULTIPLICITY: {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1107 if ($InternalSpinMultiplicity == 1) { $MDLRadical = 1; last SPINMULTIPLICITY;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1108 if ($InternalSpinMultiplicity == 2) { $MDLRadical = 2; last SPINMULTIPLICITY;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1109 if ($InternalSpinMultiplicity == 3) { $MDLRadical = 3; last SPINMULTIPLICITY;}
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1110 $MDLRadical = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1111 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1112 return $MDLRadical;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1113 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1114
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1115 # Process generic CTAB property line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1116 sub _ParseCmpdGenericPropertyLine {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1117 my($PropertyName, $Line) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1118
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1119 my($Label, $PropertyLabel, $ValuesCount, $ValuePairsCount, @ValuePairs);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1120
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1121 @ValuePairs = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1122 ($Label, $PropertyLabel, $ValuesCount, @ValuePairs) = split(' ', $Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1123 $ValuePairsCount = (scalar @ValuePairs)/2;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1124 if ($ValuesCount != $ValuePairsCount) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1125 carp "Warning: _ParseCmpdGenericPropertyLine: Number of atom number and $PropertyName value paris specified on $Label $PropertyLabel property line, $ValuePairsCount, does not match expected value of $ValuesCount...";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1126 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1127
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1128 return (@ValuePairs);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1129 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1130
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1131 # Generic CTAB property lines for charge, istope and radical properties...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1132 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1133 sub _GenerateCmpdGenericPropertyLines {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1134 my($PropertyName, $PropertyValuePairsRef) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1135 my($Index, $PropertyLabel, $Line, $PropertyCount, $AtomNum, $PropertyValue, @PropertyLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1136
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1137 @PropertyLines = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1138 NAME: {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1139 if ($PropertyName =~ /^Charge$/i) { $PropertyLabel = "M CHG"; last NAME; }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1140 if ($PropertyName =~ /^Isotope$/i) { $PropertyLabel = "M ISO"; last NAME; }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1141 if ($PropertyName =~ /^Radical$/i) { $PropertyLabel = "M RAD"; last NAME; }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1142 carp "Warning: _GenerateCmpdGenericPropertyLines: Unknown property name, $PropertyName, specified...";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1143 return @PropertyLines;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1144 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1145
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1146 # A maximum of 8 property pair values allowed per line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1147 $PropertyCount = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1148 $Line = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1149 for ($Index = 0; $Index < $#{$PropertyValuePairsRef}; $Index += 2) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1150 if ($PropertyCount > 8) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1151 # Setup property line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1152 $Line = "${PropertyLabel} 8${Line}";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1153 push @PropertyLines, $Line;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1154
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1155 $PropertyCount = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1156 $Line = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1157 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1158 $PropertyCount++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1159 $AtomNum = $PropertyValuePairsRef->[$Index];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1160 $PropertyValue = $PropertyValuePairsRef->[$Index + 1];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1161 $Line .= sprintf " %3i %3i", $AtomNum, $PropertyValue;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1162 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1163 if ($Line) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1164 $Line = "${PropertyLabel} ${PropertyCount}${Line}";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1165 push @PropertyLines, $Line;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1166 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1167 return @PropertyLines;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1168 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1169
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1170 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1171 # Read compound data into a string and return its value
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1172 sub ReadCmpdString {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1173 my($SDFileRef) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1174 my($CmpdString);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1175
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1176 $CmpdString = "";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1177 LINE: while (defined($_ = <$SDFileRef>)) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1178 # Change Windows and Mac new line char to UNIX...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1179 s/(\r\n)|(\r)/\n/g;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1180
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1181 if (/^\$\$\$\$/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1182 # Take out any new line char at the end by explicitly removing it instead of using
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1183 # chomp, which might not always work correctly on files generated on a system
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1184 # with a value of input line separator different from the current system...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1185 s/\n$//g;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1186
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1187 # Doesn't hurt to chomp...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1188 chomp;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1189
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1190 $CmpdString .= $_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1191 last LINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1192 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1193 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1194 $CmpdString .= $_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1195 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1196 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1197 return $CmpdString;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1198 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1199
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1200 # Find out the number of fragements in the compounds. And for the compound with
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1201 # more than one fragment, remove all the others besides the largest one.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1202 sub WashCmpd {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1203 my($CmpdLines) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1204 my($WashedCmpdString, $FragmentCount, $Fragments);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1205
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1206 $WashedCmpdString = "";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1207 ($FragmentCount, $Fragments) = GetCmpdFragments($CmpdLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1208 if ($FragmentCount > 1) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1209 # Go over the compound data for the largest fragment including property
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1210 # data...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1211 my (@AllFragments, @LargestFragment, %LargestFragmentAtoms, @WashedCmpdLines, $Index, $LineIndex, $AtomCount, $BondCount, $NewAtomCount, $NewBondCount, $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo, $FirstNewAtomNum, $SecondNewAtomNum, $AtomNum, $ChiralFlag, $BondLine, $MENDLineIndex, $Line, $Value, @ValuePairs, @NewValuePairs, $ValuePairIndex, $NewAtomNum, @NewPropertyLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1212
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1213 @AllFragments = (); @LargestFragment = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1214 %LargestFragmentAtoms = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1215 @AllFragments = split "\n", $Fragments;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1216 @LargestFragment = split " ", $AllFragments[0];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1217 for $Index (0 .. $#LargestFragment) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1218 # Map old atom numbers to new atom numbers as the fragment atom numbers are sorted
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1219 # from lowest to highest old atom numbers...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1220 $LargestFragmentAtoms{$LargestFragment[$Index]} = $Index + 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1221 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1222 @WashedCmpdLines = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1223 push @WashedCmpdLines, @$CmpdLines[0], @$CmpdLines[1], @$CmpdLines[2], @$CmpdLines[3];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1224 ($AtomCount, $BondCount, $ChiralFlag) = ParseCmpdCountsLine(@$CmpdLines[3]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1225 $NewAtomCount = @LargestFragment;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1226 $NewBondCount = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1227 $AtomNum = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1228 # Retrieve the largest fragment atom lines...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1229 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1230 $AtomNum++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1231 if ($LargestFragmentAtoms{$AtomNum}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1232 push @WashedCmpdLines, @$CmpdLines[$LineIndex];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1233 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1234 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1235 # Retrieve the largest fragment bond lines...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1236 for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1237 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1238 if ($LargestFragmentAtoms{$FirstAtomNum} && $LargestFragmentAtoms{$SecondAtomNum}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1239 $NewBondCount++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1240 # Set up bond line with new atom number mapping...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1241 $FirstNewAtomNum = $LargestFragmentAtoms{$FirstAtomNum};
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1242 $SecondNewAtomNum = $LargestFragmentAtoms{$SecondAtomNum};
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1243 $BondLine = GenerateCmpdBondLine($FirstNewAtomNum, $SecondNewAtomNum, $BondType, $BondStereo);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1244 push @WashedCmpdLines, $BondLine;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1245 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1246 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1247 # Get property lines for CHG, ISO and RAD label and map the old atom numbers to new
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1248 # atom numners; Others, property lines before M END line are skipped as atom numbers for
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1249 # other properties might not valid anymore...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1250 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1251 $MENDLineIndex = $LineIndex;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1252 LINE: for ($LineIndex = (4 + $AtomCount + $BondCount); $LineIndex < @$CmpdLines; $LineIndex++) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1253 $Line = @$CmpdLines[$LineIndex];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1254 if ($Line =~ /^M END/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1255 push @WashedCmpdLines, "M END";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1256 $MENDLineIndex = $LineIndex;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1257 last LINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1258 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1259
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1260 @ValuePairs = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1261 if ($Line =~ /^M CHG/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1262 @ValuePairs = ParseCmpdChargePropertyLine($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1263 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1264 elsif ($Line =~ /^M RAD/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1265 @ValuePairs = ParseCmpdRadicalPropertyLine($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1266 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1267 elsif ($Line =~ /^M ISO/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1268 @ValuePairs = ParseCmpdIsotopePropertyLine($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1269 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1270 elsif ($Line =~ /^A /i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1271 my($NextLine);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1272 $LineIndex++;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1273 $NextLine = @$CmpdLines[$LineIndex];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1274 @ValuePairs = ParseCmpdAtomAliasPropertyLine($Line, $NextLine);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1275 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1276 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1277 next LINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1278 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1279
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1280 if (!@ValuePairs) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1281 next LINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1282 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1283
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1284 # Collect values for valid atom numbers with mapping to new atom numbers...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1285 @NewValuePairs = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1286 VALUEINDEX: for ($ValuePairIndex = 0; $ValuePairIndex < $#ValuePairs; $ValuePairIndex += 2) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1287 $AtomNum = $ValuePairs[$ValuePairIndex]; $Value = $ValuePairs[$ValuePairIndex + 1];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1288 if (!exists $LargestFragmentAtoms{$AtomNum}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1289 next VALUEINDEX;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1290 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1291 $NewAtomNum = $LargestFragmentAtoms{$AtomNum};
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1292 push @NewValuePairs, ($NewAtomNum, $Value)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1293 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1294 if (!@NewValuePairs) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1295 next LINE;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1296 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1297 @NewPropertyLines = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1298 if ($Line =~ /^M CHG/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1299 @NewPropertyLines = GenerateCmpdChargePropertyLines(\@NewValuePairs);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1300 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1301 elsif ($Line =~ /^M RAD/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1302 @NewPropertyLines = GenerateCmpdRadicalPropertyLines(\@NewValuePairs);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1303 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1304 elsif ($Line =~ /^M ISO/i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1305 @NewPropertyLines = GenerateCmpdIsotopePropertyLines(\@NewValuePairs);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1306 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1307 elsif ($Line =~ /^A /i) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1308 @NewPropertyLines = GenerateCmpdAtomAliasPropertyLines(\@NewValuePairs);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1309 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1310 push @WashedCmpdLines, @NewPropertyLines;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1311 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1312
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1313 # Retrieve rest of the data label and value property data...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1314 for ($LineIndex = (1 + $MENDLineIndex); $LineIndex < @$CmpdLines; $LineIndex++) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1315 push @WashedCmpdLines, @$CmpdLines[$LineIndex];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1316 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1317 # Update atom and bond count line...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1318 $WashedCmpdLines[3] = GenerateCmpdCountsLine($NewAtomCount, $NewBondCount, $ChiralFlag);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1319
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1320 $WashedCmpdString = join "\n", @WashedCmpdLines;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1321 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1322 return ($FragmentCount, $Fragments, $WashedCmpdString);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1323 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1324
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1325 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1326
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1327 __END__
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1328
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1329 =head1 NAME
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1330
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1331 SDFileUtil
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1332
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1333 =head1 SYNOPSIS
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1334
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1335 use SDFileUtil ;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1336
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1337 use SDFileUtil qw(:all);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1338
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1339 =head1 DESCRIPTION
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1340
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1341 B<SDFileUtil> module provides the following functions:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1342
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1343 GenerateCmpdAtomAliasPropertyLines, GenerateCmpdAtomLine, GenerateCmpdBondLine,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1344 GenerateCmpdChargePropertyLines, GenerateCmpdCommentsLine, GenerateCmpdCountsLine,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1345 GenerateCmpdDataHeaderLabelsAndValuesLines, GenerateCmpdIsotopePropertyLines,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1346 GenerateCmpdMiscInfoLine, GenerateCmpdMolNameLine,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1347 GenerateCmpdRadicalPropertyLines, GenerateEmptyCtabBlockLines,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1348 GenerateMiscLineDateStamp, GetAllAndCommonCmpdDataHeaderLabels,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1349 GetCmpdDataHeaderLabels, GetCmpdDataHeaderLabelsAndValues, GetCmpdFragments,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1350 GetCtabLinesCount, GetInvalidAtomNumbers, GetUnknownAtoms,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1351 InternalBondOrderToMDLBondType, InternalBondStereochemistryToMDLBondStereo,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1352 InternalChargeToMDLCharge, InternalSpinMultiplicityToMDLRadical, IsCmpd2D,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1353 IsCmpd3D, MDLBondStereoToInternalBondStereochemistry,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1354 MDLBondTypeToInternalBondOrder, MDLChargeToInternalCharge,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1355 MDLRadicalToInternalSpinMultiplicity, ParseCmpdAtomAliasPropertyLine,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1356 ParseCmpdAtomLine, ParseCmpdBondLine, ParseCmpdChargePropertyLine,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1357 ParseCmpdCommentsLine, ParseCmpdCountsLine, ParseCmpdIsotopePropertyLine,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1358 ParseCmpdMiscInfoLine, ParseCmpdMolNameLine, ParseCmpdRadicalPropertyLine,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1359 ReadCmpdString, RemoveCmpdDataHeaderLabelAndValue, WashCmpd
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1360
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1361 =head1 METHODS
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1362
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1363 =over 4
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1364
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1365 =item B<GenerateCmpdAtomAliasPropertyLines>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1366
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1367 @Lines = GenerateCmpdAtomAliasPropertyLines($AliasValuePairsRef);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1368
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1369 Returns a formatted atom alias property lines corresponding to successive pairs
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1370 of atom number and alias values specified by a refernce to an array. Two lines
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1371 are generate for each atom number and alias value pairs: First line - A <AtomNum>;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1372 Second line:<AtomAlias>.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1373
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1374 =item B<GenerateCmpdAtomLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1375
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1376 $Line = GenerateCmpdAtomLine($AtomSymbol, $AtomX, $AtomY,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1377 $AtomZ, [$MassDifference, $Charge, $StereoParity]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1378
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1379 Returns a formatted atom data line containing all the input values.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1380
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1381 =item B<GenerateCmpdBondLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1382
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1383 $Line = GenerateCmpdBondLine($FirstAtomNum, $SecondAtomNum,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1384 $BondType, [$BondStereo]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1385
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1386 Returns a formatted bond data line containing all the input values.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1387
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1388 =item B<GenerateCmpdChargePropertyLines>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1389
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1390 @Lines = GenerateCmpdChargePropertyLines($ChargeValuePairsRef);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1391
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1392 Returns a formatted M CHG property lines corresponding to successive pairs of
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1393 atom number and charge values specified by a refernce to an array.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1394
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1395 =item B<GenerateCmpdCommentsLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1396
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1397 $Line = GenerateCmpdCommentsLine($Comments);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1398
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1399 Returns a formatted comments data line.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1400
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1401 =item B<GenerateCmpdCountsLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1402
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1403 $Line = GenerateCmpdCountsLine($AtomCount, $BondCount,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1404 $ChiralFlag, [$PropertyCount, $Version]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1405
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1406 Returns a formatted line containing all the input values. The default values of 999
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1407 and V2000 are used for I<PropertyCount> and I<Version>.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1408
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1409 =item B<GenerateCmpdDataHeaderLabelsAndValuesLines>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1410
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1411 @Lines = GenerateCmpdDataHeaderLabelsAndValuesLines(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1412 $DataHeaderLabelsRef, $DataHeaderLabelsAndValuesRef,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1413 [$SortDataLabels]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1414
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1415 Returns formatted data lines containing header label and values lines corresponding to
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1416 all data header labels in array reference I<DataHeaderLabelsRef> with values in hash
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1417 reference I<DataHeaderLabelsAndValuesRef>. By default, data header labels are
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1418 not sorted and correspond to the label order in array reference I<DataHeaderLabelsRef>.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1419
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1420 =item B<GenerateCmpdIsotopePropertyLines>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1421
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1422 @Lines = GenerateCmpdIsotopePropertyLines($IsotopeValuePairsRef);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1423
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1424 Returns a formatted M ISO property lines corresponding to successive pairs of
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1425 atom number and isotope values specified by a refernce to an array.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1426
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1427 =item B<GenerateCmpdMiscInfoLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1428
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1429 $Line = GenerateCmpdMiscInfoLine([$ProgramName, $UserInitial,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1430 $Code]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1431
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1432 Returns a formatted line containing specified user initial, program name, date and code.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1433 Default values are: I<ProgramName - MayaChem; UserInitial - NULL; Code - 2D>.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1434
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1435 =item B<GenerateCmpdMolNameLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1436
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1437 $Line = GenerateCmpdMolNameLine($MolName);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1438
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1439 Returns a formatted molecule name data line.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1440
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1441 =item B<GenerateCmpdRadicalPropertyLines>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1442
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1443 @Lines = GenerateCmpdRadicalPropertyLines($RadicalValuePairsRef);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1444
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1445 Returns a formatted M CHG property lines corresponding to successive pairs of
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1446 atom number and multiplicity values specified by a refernce to an array.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1447
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1448 =item B<GenerateEmptyCtabBlockLines>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1449
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1450 $Lines = GenerateCmpdMiscInfoLine([$Date]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1451
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1452 Returns formatted lines representing empty CTAB block.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1453
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1454 =item B<GenerateMiscLineDateStamp>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1455
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1456 $Line = GenerateMiscLineDateStamp();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1457
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1458 Returns date stamp for misc line.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1459
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1460 =item B<GetAllAndCommonCmpdDataHeaderLabels>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1461
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1462 ($CmpdCount, $DataFieldLabelsArrayRef,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1463 $CommonDataFieldLabelsArrayRef) =
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1464 GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1465
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1466 Returns number of comopunds, a reference to an array containing all unique data header
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1467 label and a reference to an array containing common data field labels for all compounds
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1468 in SD file.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1469
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1470 =item B<GetCmpdDataHeaderLabels>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1471
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1472 (@Labels) = GetCmpdDataHeaderLabels(\@CmpdLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1473
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1474 Returns an array containg data header labels for a compound
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1475
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1476 =item B<GetCmpdDataHeaderLabelsAndValues>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1477
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1478 (%DataValues) = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1479
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1480 Returns a hash conating data header labes and values for a compound.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1481
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1482 =item B<GetCmpdFragments>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1483
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1484 ($FragmentCount, $FragmentString) = GetCmpdFragments(\@CmpLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1485
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1486 Figures out the number of disconnected fragments and return their values along
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1487 with the atom numbers in a string delimited by new line character. Fragment data
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1488 in B<FragmentString> is sorted on based on its size.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1489
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1490 =item B<GetCtabLinesCount>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1491
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1492 $CtabLinesCount = GetCtabLinesCount(\@CmpdLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1493
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1494 Returns number of lines present between the 4th line and the line containg "M END".
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1495
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1496 =item B<GetInvalidAtomNumbers>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1497
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1498 ($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) =
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1499 GetInvalidAtomNumbers(\@CmpdLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1500
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1501 Returns a list of values containing information about invalid atom numbers present
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1502 in block or atom property lines.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1503
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1504 =item B<GetUnknownAtoms>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1505
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1506 ($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) =
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1507 GetUnknownAtoms(\@CmpdLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1508
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1509 Returns a list of values containing information about atoms which contain special element
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1510 symbols not present in the periodic table.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1511
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1512 =item B<InternalBondOrderToMDLBondType>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1513
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1514 $MDLBondType = InternalBondOrderToMDLBondType($InternalBondOrder);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1515
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1516 Returns value of I<MDLBondType> corresponding to I<InternalBondOrder>.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1517
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1518 InternalBondOrder MDLBondType
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1519
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1520 1 1
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1521 2 2
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1522 3 3
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1523 1.5 4
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1524
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1525 =item B<InternalBondStereochemistryToMDLBondStereo>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1526
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1527 $MDLBondStereo = InternalBondStereochemistryToMDLBondStereo(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1528 $InternalBondStereo);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1529
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1530 Returns value of I<MDLBondStereo> corresponding to I<InternalBondStereo> using following
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1531 mapping:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1532
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1533 InternalBondStereo MDLBondStereo
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1534
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1535 Up 1
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1536 UpOrDown 4
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1537 Down 6
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1538 CisOrTrans 3
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1539 Other 0
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1540
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1541 =item B<InternalChargeToMDLCharge>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1542
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1543 $MDLCharge = InternalChargeToMDLCharge($InternalCharge);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1544
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1545 Returns value of I<MDLCharge> corresponding to I<InternalCharge> using following
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1546 mapping:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1547
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1548 InternalCharge MDLCharge
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1549
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1550 3 1
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1551 2 2
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1552 1 3
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1553 -1 5
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1554 -2 6
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1555 -3 7
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1556
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1557 =item B<InternalSpinMultiplicityToMDLRadical>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1558
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1559 $MDLRadical = InternalSpinMultiplicityToMDLRadical(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1560 $InternalSpinMultiplicity);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1561
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1562 Returns value of I<MDLRadical> corresponding to I<InternalSpinMultiplicity>. These
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1563 value are equivalent.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1564
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1565 =item B<MDLBondStereoToInternalBondType>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1566
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1567 $InternalBondType = MDLBondStereoToInternalBondType($MDLBondStereo);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1568
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1569 Returns value of I<InternalBondType> corresponding to I<MDLBondStereo> using
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1570 mapping shown for B<InternalBondTypeToMDLBondStereo> function.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1571
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1572 =item B<IsCmpd2D>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1573
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1574 $Status = IsCmpd2D();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1575
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1576 Returns 1 or 0 based on whether z-coordinate of any atom is non-zero.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1577
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1578 =item B<IsCmpd3D>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1579
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1580 $Status = IsCmpd3D();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1581
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1582 Returns 1 or 0 based on whether z-coordinate of any atom is non-zero.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1583
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1584 =item B<MDLBondStereoToInternalBondStereochemistry>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1585
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1586 $InternalBondStereo = MDLBondStereoToInternalBondStereochemistry(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1587 $MDLBondStereo);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1588
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1589 Returns value of I<InternalBondStereo> corresponding to I<MDLBondStereo> using
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1590 mapping shown for B<InternalBondStereochemistryToMDLBondStereo> function.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1591
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1592 =item B<MDLBondTypeToInternalBondOrder>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1593
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1594 $InternalBondOrder = MDLBondTypeToInternalBondOrder($MDLBondType);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1595
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1596 Returns value of I<InternalBondOrder> corresponding to I<MDLBondType> using
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1597 mapping shown for B<InternalBondOrderToMDLBondType> function.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1598
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1599 =item B<MDLChargeToInternalCharge>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1600
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1601 $InternalCharge = MDLChargeToInternalCharge($MDLCharge);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1602
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1603 Returns value of I<$InternalCharge> corresponding to I<MDLCharge> using
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1604 mapping shown for B<InternalChargeToMDLCharge> function.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1605
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1606 =item B<MDLRadicalToInternalSpinMultiplicity>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1607
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1608 $InternalSpinMultiplicity = MDLRadicalToInternalSpinMultiplicity(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1609 $MDLRadical);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1610
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1611 Returns value of I<InternalSpinMultiplicity> corresponding to I<MDLRadical>. These
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1612 value are equivalent.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1613
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1614 =item B<ParseCmpdAtomAliasPropertyLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1615
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1616 @AtomNumAndValuePairs = ParseCmpdAtomAliasPropertyLine(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1617 $CurrentLine, $NexLine);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1618
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1619 Parses atom alias propery lines in CTAB generic properties block and returns an array
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1620 with successive pairs of values corresponding to atom number and its alias.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1621
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1622 =item B<ParseCmpdAtomLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1623
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1624 ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1625 $StereoParity) = ParseCmpdAtomLine($AtomDataLine);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1626
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1627 Parses compound data line containing atom information and returns a list
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1628 of values.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1629
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1630 =item B<ParseCmpdBondLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1631
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1632 ($FirstAtomNum, $SecondAtomNum, $BondType) =
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1633 ParseCmpdBondLine($BondDataLine);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1634
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1635 Parses compound data line containing bond information and returns a list of
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1636 values.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1637
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1638 =item B<ParseCmpdCommentsLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1639
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1640 $Comments = ParseCmpdCommentsLine($CommentsDataLine);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1641
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1642 Returns the comment string.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1643
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1644 =item B<ParseCmpdChargePropertyLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1645
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1646 @AtomNumAndValuePairs = ParseCmpdChargePropertyLine(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1647 $ChargeDataLine);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1648
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1649 Parses charge propery line in CTAB generic properties block and returns an array
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1650 with successive pairs of values corresponding to atom number and its charge.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1651
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1652 =item B<ParseCmpdCountsLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1653
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1654 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) =
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1655 ParseCmpdCountsLine(\@CountDataLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1656
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1657 Returns a list of values containing count information.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1658
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1659 =item B<ParseCmpdMiscInfoLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1660
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1661 ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1662 $Energy, $RegistryNum) = ParseCmpdMiscInfoLine($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1663
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1664 Returns a list of values containing miscellaneous information.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1665
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1666 =item B<ParseCmpdIsotopePropertyLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1667
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1668 @AtomNumAndValuePairs = ParseCmpdIsotopePropertyLine(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1669 $IsotopeDataLine);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1670
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1671 Parses isotopic propery line in CTAB generic properties block and returns an array
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1672 with successive pairs of values corresponding to atom number and absolute mass of
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1673 atom isotope.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1674
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1675 =item B<ParseCmpdMolNameLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1676
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1677 $MolName = ParseCmpdMolNameLine($Line);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1678
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1679 Returns a string containing molecule name.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1680
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1681 =item B<ParseCmpdRadicalPropertyLine>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1682
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1683 @AtomNumAndValuePairs = ParseCmpdRadicalPropertyLine(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1684 $RadicalDataLine);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1685
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1686 Parses radical propery line in CTAB generic properties block and returns an array
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1687 with successive pairs of values corresponding to atom number and radical number
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1688 value.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1689
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1690 =item B<RemoveCmpdDataHeaderLabelAndValue>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1691
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1692 $NewCmpdString = RemoveCmpdDataHeaderLabelAndValue($CmpdString,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1693 $DataHeaderLabel);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1694
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1695 Returns a B<NewCmpdString> after removing I<DataHeaderLabel> along with its
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1696 value from I<CmpdString>.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1697
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1698 =item B<ReadCmpdString>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1699
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1700 $CmpdString = ReadCmpdString(\*SDFILEHANDLE);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1701
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1702 Returns a string containing all the data lines for the next available compound
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1703 in an already open file indicated by SDFILEHANDLE. A NULL string is returned
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1704 on EOF.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1705
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1706 =item B<WashCmpd>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1707
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1708 ($FragmentCount, $Fragments, $WashedCmpdString) =
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1709 WashCmpd(\@CmpdLines);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1710
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1711 Figures out the number of disconnected fragments and return their values along
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1712 with the atom numbers in a string delimited by new line character. Fragment data
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1713 in B<FragmentString> is sorted on based on its size.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1714
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1715 =back
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1716
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1717 =head1 AUTHOR
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1718
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1719 Manish Sud <msud@san.rr.com>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1720
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1721 =head1 SEE ALSO
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1722
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1723 TextUtil.pm
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1724
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1725 =head1 COPYRIGHT
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1726
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1727 Copyright (C) 2015 Manish Sud. All rights reserved.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1728
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1729 This file is part of MayaChemTools.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1730
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1731 MayaChemTools is free software; you can redistribute it and/or modify it under
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1732 the terms of the GNU Lesser General Public License as published by the Free
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1733 Software Foundation; either version 3 of the License, or (at your option)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1734 any later version.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1735
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1736 =cut