annotate lib/SDFileUtil.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1 package SDFileUtil;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
2 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: SDFileUtil.pm,v $
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/02/28 20:47:18 $
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.49 $
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
6 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
8 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
10 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
12 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
17 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
22 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
27 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
28
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
30 use Exporter;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
31 use Carp;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
32 use PeriodicTable qw(IsElement);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
33 use TimeUtil ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
34
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
36
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
37 @ISA = qw(Exporter);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
38 @EXPORT = qw(GenerateCmpdAtomLine GenerateCmpdBondLine GenerateCmpdChargePropertyLines GenerateCmpdCommentsLine GenerateCmpdCountsLine GenerateCmpdAtomAliasPropertyLines GenerateCmpdIsotopePropertyLines GenerateCmpdDataHeaderLabelsAndValuesLines GenerateCmpdMiscInfoLine GenerateCmpdRadicalPropertyLines GenerateCmpdMolNameLine GenerateEmptyCtabBlockLines GenerateMiscLineDateStamp GetAllAndCommonCmpdDataHeaderLabels GetCmpdDataHeaderLabels GetCmpdDataHeaderLabelsAndValues GetCmpdFragments GetCtabLinesCount GetUnknownAtoms GetInvalidAtomNumbers MDLChargeToInternalCharge InternalChargeToMDLCharge MDLBondTypeToInternalBondOrder InternalBondOrderToMDLBondType MDLBondStereoToInternalBondStereochemistry InternalBondStereochemistryToMDLBondStereo InternalSpinMultiplicityToMDLRadical MDLRadicalToInternalSpinMultiplicity IsCmpd3D IsCmpd2D ParseCmpdAtomLine ParseCmpdBondLine ParseCmpdCommentsLine ParseCmpdCountsLine ParseCmpdMiscInfoLine ParseCmpdMolNameLine ParseCmpdAtomAliasPropertyLine ParseCmpdChargePropertyLine ParseCmpdIsotopePropertyLine ParseCmpdRadicalPropertyLine ReadCmpdString RemoveCmpdDataHeaderLabelAndValue WashCmpd);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
39 @EXPORT_OK = qw();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
40 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
41
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
42 # Format data for compounds count line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
43 sub GenerateCmpdCountsLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
44 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version, $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
45
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
46 if (@_ == 5) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
47 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
48 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
49 elsif (@_ == 3) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
50 ($AtomCount, $BondCount, $ChiralFlag) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
51 $PropertyCount = 999;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
52 $Version = "V2000";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
53 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
54 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
55 ($AtomCount, $BondCount) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
56 $ChiralFlag = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
57 $PropertyCount = 999;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
58 $Version = "V2000";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
59 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
60 if ($AtomCount > 999) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
61 croak "Error: SDFileUtil::GenerateCmpdCountsLine: The atom count, $AtomCount, exceeds maximum of 999 allowed for CTAB version 2000. The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools...";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
62 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
63 $Line = sprintf "%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%6s", $AtomCount, $BondCount, 0, 0, $ChiralFlag, 0, 0, 0, 0, 0, $PropertyCount, $Version;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
64
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
65 return ($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
66 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
67
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
68 # Generate comments line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
69 sub GenerateCmpdCommentsLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
70 my($Comments) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
71 my($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
72
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
73 $Line = (length($Comments) > 80) ? substr($Comments, 0, 80) : $Comments;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
74
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
75 return $Line;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
76 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
77
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
78 # Generate molname line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
79 sub GenerateCmpdMolNameLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
80 my($MolName) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
81 my($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
82
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
83 $Line = (length($MolName) > 80) ? substr($MolName, 0, 80) : $MolName;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
84
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
85 return $Line;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
86 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
87
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
88 # Generate data for compounds misc info line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
89 sub GenerateCmpdMiscInfoLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
90 my($ProgramName, $UserInitial, $Code) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
91 my($Date, $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
92
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
93 if (!(defined($ProgramName) && $ProgramName)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
94 $ProgramName = "MayaChem";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
95 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
96 if (!(defined($UserInitial) && $UserInitial)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
97 $UserInitial = " ";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
98 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
99 if (!(defined($Code) && $Code)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
100 $Code = "2D";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
101 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
102
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
103 if (length($ProgramName) > 8) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
104 $ProgramName = substr($ProgramName, 0, 8);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
105 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
106 if (length($UserInitial) > 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
107 $UserInitial = substr($UserInitial, 0, 2);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
108 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
109 if (length($Code) > 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
110 $Code = substr($Code, 0, 2);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
111 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
112 $Date = GenerateMiscLineDateStamp();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
113
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
114 $Line = "${UserInitial}${ProgramName}${Date}${Code}";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
115
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
116 return $Line;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
117 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
118
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
119 # Generate data for compounds misc info line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
120 sub GenerateEmptyCtabBlockLines {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
121 my($Date, $Lines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
122
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
123 if (@_ == 1) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
124 ($Date) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
125 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
126 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
127 $Date = GenerateMiscLineDateStamp();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
128 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
129 # First line: Blank molname line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
130 # Second line: Misc info...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
131 # Third line: Blank comments line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
132 # Fourth line: Counts line reflecting empty structure data block...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
133 $Lines = "\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
134 $Lines .= " MayaChem${Date}2D\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
135 $Lines .= "\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
136 $Lines .= GenerateCmpdCountsLine(0, 0, 0) . "\n";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
137 $Lines .= "M END";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
138
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
139 return $Lines;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
140 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
141
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
142 # Generate SD file data stamp...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
143 sub GenerateMiscLineDateStamp {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
144 return TimeUtil::SDFileTimeStamp();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
145 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
146
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
147 # Generate data for compound atom line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
148 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
149 sub GenerateCmpdAtomLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
150 my($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
151 my($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
152
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
153 if (!defined $MassDifference) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
154 $MassDifference = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
155 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
156 if (!defined $Charge) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
157 $Charge = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
158 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
159 if (!defined $StereoParity) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
160 $StereoParity = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
161 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
162 $Line = sprintf "%10.4f%10.4f%10.4f %-3s%2i%3i%3i 0 0 0 0 0 0 0 0 0", $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
163
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
164 return $Line
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
165 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
166
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
167 # Generate data for compound bond line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
168 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
169 sub GenerateCmpdBondLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
170 my($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
171 my($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
172
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
173 if (!defined $BondStereo) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
174 $BondStereo = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
175 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
176 $Line = sprintf "%3i%3i%3i%3i 0 0 0", $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
177
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
178 return $Line
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
179 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
180
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
181 # Generate charge property lines for CTAB block...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
182 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
183 sub GenerateCmpdChargePropertyLines {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
184 my($ChargeValuePairsRef) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
185
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
186 return _GenerateCmpdGenericPropertyLines('Charge', $ChargeValuePairsRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
187 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
188
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
189 # Generate isotope property lines for CTAB block...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
190 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
191 sub GenerateCmpdIsotopePropertyLines {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
192 my($IsotopeValuePairsRef) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
193
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
194 return _GenerateCmpdGenericPropertyLines('Isotope', $IsotopeValuePairsRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
195 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
196
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
197 # Generate radical property line property lines for CTAB block...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
198 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
199 sub GenerateCmpdRadicalPropertyLines {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
200 my($RadicalValuePairsRef) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
201
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
202 return _GenerateCmpdGenericPropertyLines('Radical', $RadicalValuePairsRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
203 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
204
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
205 # Generate atom alias property line property lines for CTAB block...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
206 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
207 # Atom alias property line format:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
208 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
209 # A aaa
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
210 # x...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
211 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
212 # aaa: Atom number
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
213 # x: Atom alias in next line
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
214 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
215 sub GenerateCmpdAtomAliasPropertyLines {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
216 my($PropertyValuePairsRef) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
217 my($Index, $AtomNum, $AtomAlias, $Line, @PropertyLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
218
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
219 @PropertyLines = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
220
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
221 for ($Index = 0; $Index < $#{$PropertyValuePairsRef}; $Index += 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
222 $AtomNum = $PropertyValuePairsRef->[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
223 $AtomAlias = $PropertyValuePairsRef->[$Index + 1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
224
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
225 $Line = "A " . sprintf "%3i", $AtomNum;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
226
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
227 push @PropertyLines, $Line;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
228 push @PropertyLines, $AtomAlias;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
229 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
230
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
231 return @PropertyLines;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
232 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
233
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
234 # Generate data header labels and values lines...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
235 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
236 sub GenerateCmpdDataHeaderLabelsAndValuesLines {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
237 my($DataHeaderLabelsRef, $DataHeaderLabelsAndValuesRef, $SortDataLabels) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
238 my($DataLabel, $DataValue, @DataLabels, @DataLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
239
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
240 if (!defined $SortDataLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
241 $SortDataLabels = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
242 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
243
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
244 @DataLines = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
245 @DataLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
246 if ($SortDataLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
247 push @DataLabels, sort @{$DataHeaderLabelsRef};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
248 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
249 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
250 push @DataLabels, @{$DataHeaderLabelsRef};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
251 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
252 for $DataLabel (@DataLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
253 $DataValue = '';
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
254 if (exists $DataHeaderLabelsAndValuesRef->{$DataLabel}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
255 $DataValue = $DataHeaderLabelsAndValuesRef->{$DataLabel};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
256 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
257 push @DataLines, ("> <${DataLabel}>", "$DataValue", "");
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
258 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
259 return @DataLines;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
260 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
261
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
262 # Parse data field header in SD file and return lists of all and common data field
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
263 # labels.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
264 sub GetAllAndCommonCmpdDataHeaderLabels {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
265 my($SDFileRef) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
266 my($CmpdCount, $CmpdString, $Label, @CmpdLines, @DataFieldLabels, @CommonDataFieldLabels, %DataFieldLabelsMap);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
267
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
268 $CmpdCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
269 @DataFieldLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
270 @CommonDataFieldLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
271 %DataFieldLabelsMap = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
272
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
273 while ($CmpdString = ReadCmpdString($SDFileRef)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
274 $CmpdCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
275 @CmpdLines = split "\n", $CmpdString;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
276 # Process compound data header labels and figure out which ones are present for
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
277 # all the compounds...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
278 if (@DataFieldLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
279 my (@CmpdDataFieldLabels) = GetCmpdDataHeaderLabels(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
280 my(%CmpdDataFieldLabelsMap) = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
281 # Setup a map for the current labels...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
282 for $Label (@CmpdDataFieldLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
283 $CmpdDataFieldLabelsMap{$Label} = "PresentInSome";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
284 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
285 # Check the presence old labels for this compound; otherwise, mark 'em new...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
286 for $Label (@DataFieldLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
287 if (!$CmpdDataFieldLabelsMap{$Label}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
288 $DataFieldLabelsMap{$Label} = "PresentInSome";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
289 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
290 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
291 # Check the presence this compound in the old labels; otherwise, add 'em...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
292 for $Label (@CmpdDataFieldLabels ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
293 if (!$DataFieldLabelsMap{$Label}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
294 # It's a new label...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
295 push @DataFieldLabels, $Label;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
296 $DataFieldLabelsMap{$Label} = "PresentInSome";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
297 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
298 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
299 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
300 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
301 # Get the initial label set and set up a map...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
302 @DataFieldLabels = GetCmpdDataHeaderLabels(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
303 for $Label (@DataFieldLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
304 $DataFieldLabelsMap{$Label} = "PresentInAll";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
305 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
306 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
307 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
308 # Identify the common data field labels...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
309 @CommonDataFieldLabels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
310 for $Label (@DataFieldLabels) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
311 if ($DataFieldLabelsMap{$Label} eq "PresentInAll") {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
312 push @CommonDataFieldLabels, $Label;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
313 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
314 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
315 return ($CmpdCount, \@DataFieldLabels, \@CommonDataFieldLabels);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
316 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
317
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
318 # Parse all the data header labels and return 'em as an list...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
319 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
320 # Format:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
321 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
322 #> Data header line
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
323 #Data line(s)
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
324 #Blank line
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
325 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
326 # [Data Header] (one line) precedes each item of data, starts with a greater than (>) sign, and
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
327 # contains at least one of the following:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
328 # The field name enclosed in angle brackets. For example: <melting.point>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
329 # The field number, DTn , where n represents the number assigned to the field in a MACCS-II database
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
330 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
331 #Optional information for the data header includes:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
332 # The compound’s external and internal registry numbers. External registry numbers must be enclosed in parentheses.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
333 # Any combination of information
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
334 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
335 #The following are examples of valid data headers:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
336 #> <MELTING.POINT>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
337 #> 55 (MD-08974) <BOILING.POINT> DT12
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
338 #> DT12 55
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
339 #> (MD-0894) <BOILING.POINT> FROM ARCHIVES
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
340 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
341 #Notes: Sometimes last blank line is missing and can be just followed by $$$$
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
342 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
343 sub GetCmpdDataHeaderLabels {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
344 my($CmpdLines) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
345 my($CmpdLine, $Label, @Labels);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
346
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
347 @Labels = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
348 CMPDLINE: for $CmpdLine (@$CmpdLines) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
349 if ($CmpdLine !~ /^>/) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
350 next CMPDLINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
351 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
352 # Does the line contains field name enclosed in angular brackets?
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
353 ($Label) = $CmpdLine =~ /<.*?>/g;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
354 if (!defined($Label)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
355 next CMPDLINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
356 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
357 $Label =~ s/(<|>)//g;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
358 push @Labels, $Label;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
359 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
360 return (@Labels);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
361 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
362
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
363 # Parse all the data header labels and values
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
364 sub GetCmpdDataHeaderLabelsAndValues {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
365 my($CmpdLines) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
366 my($CmpdLine, $CurrentLabel, $Label, $Value, $ValueCount, $ProcessingLabelData, @Values, %DataFields);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
367
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
368 %DataFields = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
369 $ProcessingLabelData = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
370 $ValueCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
371 CMPDLINE: for $CmpdLine (@$CmpdLines) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
372 if ($CmpdLine =~ /^\$\$\$\$/) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
373 last CMPDLINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
374 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
375 if ($CmpdLine =~ /^>/) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
376 # Does the line contains field name enclosed in angular brackets?
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
377 ($Label) = $CmpdLine =~ /<.*?>/g;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
378 if (defined $Label) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
379 $CurrentLabel = $Label;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
380 $CurrentLabel =~ s/(<|>)//g;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
381 $ProcessingLabelData = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
382 $ValueCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
383
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
384 if ($CurrentLabel) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
385 $ProcessingLabelData = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
386 $DataFields{$CurrentLabel} = '';
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
387 next CMPDLINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
388 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
389 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
390 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
391 if (!$ProcessingLabelData) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
392 # Data line containing no <label> as allowed by SDF format. Just ignore it...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
393 next CMPDLINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
394 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
395 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
396 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
397 if (!$ProcessingLabelData) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
398 next CMPDLINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
399 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
400 if (!(defined($CmpdLine) && length($CmpdLine))) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
401 # Blank line terminates value for a label...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
402 $CurrentLabel = '';
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
403 $ValueCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
404 $ProcessingLabelData = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
405 next CMPDLINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
406 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
407 $ValueCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
408 $Value = $CmpdLine;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
409
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
410 if ($ValueCount > 1) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
411 $DataFields{$CurrentLabel} .= "\n" . $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
412 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
413 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
414 $DataFields{$CurrentLabel} = $Value;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
415 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
416 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
417 return (%DataFields);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
418 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
419
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
420 # Return an updated compoud string after removing data header label along with its
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
421 # value from the specified compound string...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
422 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
423 sub RemoveCmpdDataHeaderLabelAndValue {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
424 my($CmpdString, $DataHeaderLabel) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
425 my($Line, $PorcessingDataHeaderLabel, @CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
426
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
427 @CmpdLines = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
428 $PorcessingDataHeaderLabel = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
429
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
430 CMPDLINE: for $Line (split "\n", $CmpdString) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
431 if ($Line =~ /^>/ && $Line =~ /<$DataHeaderLabel>/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
432 $PorcessingDataHeaderLabel = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
433 next CMPDLINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
434 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
435
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
436 if ($PorcessingDataHeaderLabel) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
437 # Blank line indicates end of fingerprints data value...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
438 if ($Line =~ /^\$\$\$\$/) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
439 push @CmpdLines, $Line;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
440 $PorcessingDataHeaderLabel = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
441 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
442 elsif (!length($Line)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
443 $PorcessingDataHeaderLabel = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
444 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
445 next CMPDLINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
446 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
447
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
448 # Track compound lines without fingerprints data...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
449 push @CmpdLines, $Line;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
450 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
451
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
452 return join "\n", @CmpdLines;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
453 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
454
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
455 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
456 # Using bond blocks, figure out the number of disconnected fragments and
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
457 # return their values along with the atom numbers in a string delimited by new
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
458 # line character.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
459 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
460 sub GetCmpdFragments {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
461 my($CmpdLines) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
462 my($AtomCount, $BondCount, $FirstAtomNum, $SecondAtomNum, @AtomConnections, $BondType, $FragmentString, $FragmentCount, $LineIndex, $Index, $AtomNum, $NbrAtomNum, @ProcessedAtoms, $ProcessedAtomCount, $ProcessAtomNum, @ProcessingAtoms, @ConnectedAtoms, %Fragments, $FragmentNum, $AFragmentString);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
463
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
464 # Setup the connection table for each atom...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
465 @AtomConnections = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
466 ($AtomCount, $BondCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
467 for $AtomNum (1 .. $AtomCount) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
468 %{$AtomConnections[$AtomNum]} = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
469 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
470 for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
471 ($FirstAtomNum, $SecondAtomNum, $BondType) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
472 if (!$AtomConnections[$FirstAtomNum]{$SecondAtomNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
473 $AtomConnections[$FirstAtomNum]{$SecondAtomNum} = $BondType;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
474 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
475 if (!$AtomConnections[$SecondAtomNum]{$FirstAtomNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
476 $AtomConnections[$SecondAtomNum]{$FirstAtomNum} = $BondType;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
477 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
478 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
479
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
480 #Get set to count fragments...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
481 $ProcessedAtomCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
482 $FragmentNum = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
483 %Fragments = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
484 @ProcessedAtoms = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
485 for $AtomNum (1 .. $AtomCount) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
486 $ProcessedAtoms[$AtomNum] = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
487 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
488 while ($ProcessedAtomCount < $AtomCount) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
489 @ProcessingAtoms = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
490 @ConnectedAtoms = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
491 ATOMNUM: for $AtomNum (1 .. $AtomCount) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
492 if (!$ProcessedAtoms[$AtomNum]) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
493 $ProcessedAtomCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
494 $ProcessedAtoms[$AtomNum] = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
495 push @ProcessingAtoms, $AtomNum;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
496 $FragmentNum++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
497 @{$Fragments{$FragmentNum} } = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
498 push @{$Fragments{$FragmentNum} }, $AtomNum;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
499 last ATOMNUM;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
500 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
501 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
502
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
503 # Go over the neighbors and follow the connection trail while collecting the
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
504 # atoms numbers present in the connected fragment...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
505 while (@ProcessingAtoms) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
506 for ($Index = 0; $Index < @ProcessingAtoms; $Index++) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
507 $ProcessAtomNum = $ProcessingAtoms[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
508 for $NbrAtomNum (keys %{$AtomConnections[$ProcessAtomNum]}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
509 if (!$ProcessedAtoms[$NbrAtomNum]) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
510 $ProcessedAtomCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
511 $ProcessedAtoms[$NbrAtomNum] = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
512 push @ConnectedAtoms, $NbrAtomNum;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
513 push @{ $Fragments{$FragmentNum} }, $NbrAtomNum;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
514 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
515 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
516 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
517 @ProcessingAtoms = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
518 @ProcessingAtoms = @ConnectedAtoms;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
519 @ConnectedAtoms = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
520 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
521 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
522 $FragmentCount = $FragmentNum;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
523 $FragmentString = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
524
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
525 # Sort out the fragments by size...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
526 for $FragmentNum (sort { @{$Fragments{$b}} <=> @{$Fragments{$a}} } keys %Fragments ) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
527 # Sort the atoms in a fragment by their numbers...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
528 $AFragmentString = join " ", sort { $a <=> $b } @{ $Fragments{$FragmentNum} };
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
529 if ($FragmentString) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
530 $FragmentString .= "\n" . $AFragmentString;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
531 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
532 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
533 $FragmentString = $AFragmentString;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
534 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
535 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
536 return ($FragmentCount, $FragmentString);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
537 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
538
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
539 # Count number of lines present in between 4th and line containg "M END"
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
540 sub GetCtabLinesCount {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
541 my($CmpdLines) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
542 my($LineIndex, $CtabLinesCount);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
543
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
544 $CtabLinesCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
545 LINE: for ($LineIndex = 4; $LineIndex < @$CmpdLines; $LineIndex++) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
546 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
547 # Any line after atom and bond data starting with anything other than space or
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
548 # a digit indicates end of Ctab atom/bond data block...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
549 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
550 if (@$CmpdLines[$LineIndex] !~ /^[0-9 ]/) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
551 $CtabLinesCount = $LineIndex - 4;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
552 last LINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
553 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
554 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
555 return $CtabLinesCount;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
556 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
557
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
558 # Using atom blocks, count the number of atoms which contain special element
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
559 # symbols not present in the periodic table.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
560 sub GetUnknownAtoms {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
561 my($CmpdLines) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
562 my($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines, $LineIndex, $AtomCount, $AtomSymbol);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
563
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
564 $UnknownAtomCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
565 $UnknownAtoms = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
566 $UnknownAtomLines = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
567 ($AtomCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
568 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
569 ($AtomSymbol) = ParseCmpdAtomLine(@$CmpdLines[$LineIndex]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
570 if (!IsElement($AtomSymbol)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
571 $UnknownAtomCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
572 $UnknownAtoms .= " $AtomSymbol";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
573 if ($UnknownAtomLines) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
574 $UnknownAtomLines .= "\n" . @$CmpdLines[$LineIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
575 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
576 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
577 $UnknownAtomLines = @$CmpdLines[$LineIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
578 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
579 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
580 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
581 return ($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
582 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
583
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
584 # Check z coordinates of all atoms to see whether any of them is non-zero
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
585 # which makes the compound geometry three dimensional...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
586 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
587 sub IsCmpd3D {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
588 my($CmpdLines) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
589 my($LineIndex, $AtomCount, $AtomSymbol, $AtomX, $AtomY, $AtomZ);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
590
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
591 ($AtomCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
592 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
593 ($AtomSymbol, $AtomX, $AtomY, $AtomZ) = ParseCmpdAtomLine(@$CmpdLines[$LineIndex]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
594 if ($AtomZ != 0) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
595 return 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
596 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
597 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
598 return 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
599 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
600
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
601 # Check whether it's a 2D compound...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
602 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
603 sub IsCmpd2D {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
604 my($CmpdLines) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
605
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
606 return IsCmpd3D($CmpdLines) ? 0 : 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
607 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
608
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
609 # Using bond blocks, count the number of bond lines which contain atom numbers
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
610 # greater than atom count specified in compound count line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
611 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
612 sub GetInvalidAtomNumbers {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
613 my($CmpdLines) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
614 my($LineIndex, $AtomCount, $BondCount, $FirstAtomNum, $SecondAtomNum, $InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines, $Line, $InvalidAtomPropertyLine, $ValuePairIndex, $AtomNum, $Value, @ValuePairs);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
615
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
616 ($AtomCount, $BondCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
617
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
618 $InvalidAtomNumbersCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
619 $InvalidAtomNumbers = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
620 $InvalidAtomNumberLines = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
621
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
622 # Go over bond block lines...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
623 LINE: for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
624 ($FirstAtomNum, $SecondAtomNum) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
625 if ($FirstAtomNum <= $AtomCount && $SecondAtomNum <= $AtomCount) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
626 next LINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
627 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
628 if ($FirstAtomNum > $AtomCount) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
629 $InvalidAtomNumbersCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
630 $InvalidAtomNumbers .= " $FirstAtomNum";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
631 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
632 if ($SecondAtomNum > $AtomCount) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
633 $InvalidAtomNumbersCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
634 $InvalidAtomNumbers .= " $SecondAtomNum";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
635 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
636 if ($InvalidAtomNumberLines) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
637 $InvalidAtomNumberLines .= "\n" . @$CmpdLines[$LineIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
638 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
639 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
640 $InvalidAtomNumberLines = @$CmpdLines[$LineIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
641 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
642 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
643 # Go over property lines before M END...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
644 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
645 LINE: for ($LineIndex = (4 + $AtomCount + $BondCount); $LineIndex < @$CmpdLines; $LineIndex++) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
646 $Line = @$CmpdLines[$LineIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
647 @ValuePairs = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
648 if ($Line =~ /^M END/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
649 last LINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
650 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
651 @ValuePairs = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
652 if ($Line =~ /^M CHG/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
653 @ValuePairs = ParseCmpdChargePropertyLine($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
654 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
655 elsif ($Line =~ /^M RAD/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
656 @ValuePairs = ParseCmpdRadicalPropertyLine($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
657 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
658 elsif ($Line =~ /^M ISO/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
659 @ValuePairs = ParseCmpdIsotopePropertyLine($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
660 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
661 elsif ($Line =~ /^A /i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
662 my($NextLine);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
663 $LineIndex++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
664 $NextLine = @$CmpdLines[$LineIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
665 @ValuePairs = ParseCmpdAtomAliasPropertyLine($Line, $NextLine);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
666 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
667 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
668 next LINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
669 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
670
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
671 $InvalidAtomPropertyLine = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
672 for ($ValuePairIndex = 0; $ValuePairIndex < $#ValuePairs; $ValuePairIndex += 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
673 $AtomNum = $ValuePairs[$ValuePairIndex]; $Value = $ValuePairs[$ValuePairIndex + 1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
674 if ($AtomNum > $AtomCount) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
675 $InvalidAtomPropertyLine = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
676 $InvalidAtomNumbersCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
677 $InvalidAtomNumbers .= " $AtomNum";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
678 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
679 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
680 if ($InvalidAtomPropertyLine) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
681 if ($InvalidAtomNumberLines) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
682 $InvalidAtomNumberLines .= "\n" . $Line;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
683 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
684 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
685 $InvalidAtomNumberLines = $Line;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
686 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
687 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
688 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
689
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
690 return ($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
691 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
692
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
693 # Ctab lines: Atom block
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
694 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
695 # Format: xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
696 # A10 A10 A10 xA3 A2A3 A3 A3 A3 A3 A3 A3 A3 A3 A3 A3
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
697 # x,y,z: Atom coordinates
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
698 # aaa: Atom symbol. Entry in periodic table or L for atom list, A, Q, * for unspecified
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
699 # atom, and LP for lone pair, or R# for Rgroup label
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
700 # dd: Mass difference. -3, -2, -1, 0, 1, 2, 3, 4 (0 for value beyond these limits)
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
701 # ccc: Charge. 0 = uncharged or value other than these, 1 = +3, 2 = +2, 3 = +1,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
702 # 4 = doublet radical, 5 = -1, 6 = -2, 7 = -3
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
703 # sss: Atom stereo parity. 0 = not stereo, 1 = odd, 2 = even, 3 = either or unmarked stereo center
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
704 # hhh: Hydrogen count + 1. 1 = H0, 2 = H1, 3 = H2, 4 = H3, 5 = H4
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
705 # bbb: Stereo care box. 0 = ignore stereo configuration of this double bond atom, 1 = stereo
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
706 # configuration of double bond atom must match
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
707 # vvv: Valence. 0 = no marking (default)(1 to 14) = (1 to 14) 15 = zero valence
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
708 # HHH: H0 designator. 0 = not specified, 1 = no H atoms allowed (redundant due to hhh)
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
709 # rrr: Not used
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
710 # iii: Not used
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
711 # mmm: Atom-atom mapping number. 1 - number of atoms
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
712 # nnn: Inversion/retention flag. 0 = property not applied, 1 = configuration is inverted,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
713 # 2 = configuration is retained.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
714 # eee: Exact change flag. 0 = property not applied, 1 = change on atom must be
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
715 # exactly as shown
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
716 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
717 # Notes:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
718 # . StereoParity: 1 - ClockwiseStereo, 2 - AntiClockwiseStereo; 3 - Either; 0 - none. These
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
719 # values determine chirailty around the chiral center; a non zero value indicates atom
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
720 # has been marked as chiral center.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
721 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
722 sub ParseCmpdAtomLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
723 my($Line) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
724 my ($LineIndex, $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
725
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
726 ($AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity) = ('') x 7;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
727 if (length($Line) > 31) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
728 ($AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity) = unpack("A10A10A10xA3A2A3A3", $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
729 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
730 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
731 ($AtomX, $AtomY, $AtomZ, $AtomSymbol) = unpack("A10A10A10", $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
732 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
733 return ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
734 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
735
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
736 # Map MDL charge value used in SD and MOL files to internal charge used by MayaChemTools.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
737 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
738 sub MDLChargeToInternalCharge {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
739 my($MDLCharge) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
740 my($InternalCharge);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
741
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
742 CHARGE: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
743 if ($MDLCharge == 0) { $InternalCharge = 0; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
744 if ($MDLCharge == 1) { $InternalCharge = 3; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
745 if ($MDLCharge == 2) { $InternalCharge = 2; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
746 if ($MDLCharge == 3) { $InternalCharge = 1; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
747 if ($MDLCharge == 5) { $InternalCharge = -1; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
748 if ($MDLCharge == 6) { $InternalCharge = -2; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
749 if ($MDLCharge == 7) { $InternalCharge = -3; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
750 # All other MDL charge values, including 4 corresponding to "doublet radical",
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
751 # are assigned internal value of 0.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
752 $InternalCharge = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
753 if ($MDLCharge != 4) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
754 carp "Warning: MDLChargeToInternalCharge: MDL charge value, $MDLCharge, is not supported: An internal charge value, 0, has been assigned...";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
755 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
756 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
757 return $InternalCharge;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
758 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
759
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
760 # Map internal charge used by MayaChemTools to MDL charge value used in SD and MOL files.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
761 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
762 sub InternalChargeToMDLCharge {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
763 my($InternalCharge) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
764 my($MDLCharge);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
765
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
766 CHARGE: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
767 if ($InternalCharge == 3) { $MDLCharge = 1; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
768 if ($InternalCharge == 2) { $MDLCharge = 2; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
769 if ($InternalCharge == 1) { $MDLCharge = 3; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
770 if ($InternalCharge == -1) { $MDLCharge = 5; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
771 if ($InternalCharge == -2) { $MDLCharge = 6; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
772 if ($InternalCharge == -3) { $MDLCharge = 7; last CHARGE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
773 # All other MDL charge values, including 4 corresponding to "doublet radical",
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
774 # are assigned internal value of 0.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
775 $MDLCharge = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
776 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
777 return $MDLCharge;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
778 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
779
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
780 # Ctab lines: Bond block
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
781 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
782 # Format: 111222tttsssxxxrrrccc
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
783 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
784 # 111: First atom number.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
785 # 222: Second atom number.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
786 # ttt: Bond type. 1 = Single, 2 = Double, 3 = Triple, 4 = Aromatic, 5 = Single or Double,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
787 # 6 = Single or Aromatic, 7 = Double or Aromatic, 8 = Any
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
788 # sss: Bond stereo. Single bonds: 0 = not stereo, 1 = Up, 4 = Either, 6 = Down,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
789 # Double bonds: 0 = Use x-, y-, z-coords from atom block to determine cis or trans,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
790 # 3 = Cis or trans (either) double bond
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
791 # xxx: Not used
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
792 # rrr: Bond topology. 0 = Either, 1 = Ring, 2 = Chain
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
793 # ccc: Reacting center status. 0 = unmarked, 1 = a center, -1 = not a center,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
794 # Additional: 2 = no change,4 = bond made/broken, 8 = bond order changes 12 = 4+8
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
795 # (both made/broken and changes); 5 = (4 + 1), 9 = (8 + 1), and 13 = (12 + 1) are also possible
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
796 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
797 sub ParseCmpdBondLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
798 my($Line) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
799 my($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
800
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
801 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = map {s/ //g; $_} unpack("A3A3A3A3", $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
802 return ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
803 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
804
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
805 # Map MDL bond type value used in SD and MOL files to internal bond order and bond types
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
806 # values used by MayaChemTools...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
807 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
808 sub MDLBondTypeToInternalBondOrder {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
809 my($MDLBondType) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
810 my($InternalBondOrder, $InternalBondType);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
811
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
812 $InternalBondType = '';
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
813
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
814 BONDTYPE: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
815 if ($MDLBondType == 1) { $InternalBondOrder = 1; $InternalBondType = 'Single'; last BONDTYPE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
816 if ($MDLBondType == 2) { $InternalBondOrder = 2; $InternalBondType = 'Double'; last BONDTYPE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
817 if ($MDLBondType == 3) { $InternalBondOrder = 3; $InternalBondType = 'Triple'; last BONDTYPE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
818 if ($MDLBondType == 4) { $InternalBondOrder = 1.5; $InternalBondType = 'Aromatic'; last BONDTYPE;} # Aromatic
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
819 if ($MDLBondType == 5) { $InternalBondOrder = 1; $InternalBondType = 'SingleOrDouble'; last BONDTYPE;} # Aromatic
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
820 if ($MDLBondType == 6) { $InternalBondOrder = 1; $InternalBondType = 'SingleOrAromatic'; last BONDTYPE;} # Aromatic
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
821 if ($MDLBondType == 7) { $InternalBondOrder = 2; $InternalBondType = 'DoubleOrAromatic'; last BONDTYPE;} # Aromatic
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
822 if ($MDLBondType == 8) { $InternalBondOrder = 1; $InternalBondType = 'Any'; last BONDTYPE;} # Aromatic
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
823 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
824 # Although MDL aromatic bond values are used for query only and explicit Kekule bond order
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
825 # values must be assigned, internal value of 1.5 is allowed to indicate aromatic bond orders.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
826 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
827 # All other MDL bond type values - 5 = Single or Double, 6 = Single or Aromatic, 7 = Double or Aromatic,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
828 # 8 = Any - are also assigned appropriate internal value of 1: These are meant to be used for
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
829 # structure queries by MDL products.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
830 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
831 $InternalBondOrder = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
832 $InternalBondType = 'Single';
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
833
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
834 carp "Warning: MDLBondTypeToInternalBondOrder: MDL bond type value, $MDLBondType, is not supported: An internal bond order value, 0, has been assigned...";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
835 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
836 return ($InternalBondOrder, $InternalBondType);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
837 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
838
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
839 # Map internal bond order and bond type values used by MayaChemTools to MDL bond type value used
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
840 # in SD and MOL files...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
841 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
842 sub InternalBondOrderToMDLBondType {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
843 my($InternalBondOrder, $InternalBondType) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
844 my($MDLBondType);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
845
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
846 BONDTYPE: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
847 if ($InternalBondOrder == 1) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
848 if ($InternalBondType =~ /^SingleOrDouble$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
849 $MDLBondType = 5;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
850 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
851 elsif ($InternalBondType =~ /^SingleOrAromatic$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
852 $MDLBondType = 6;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
853 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
854 elsif ($InternalBondType =~ /^Any$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
855 $MDLBondType = 8;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
856 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
857 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
858 $MDLBondType = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
859 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
860 $MDLBondType = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
861 last BONDTYPE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
862 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
863 if ($InternalBondOrder == 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
864 if ($InternalBondType =~ /^DoubleOrAromatic$/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
865 $MDLBondType = 7;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
866 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
867 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
868 $MDLBondType = 2;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
869 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
870 last BONDTYPE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
871 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
872 if ($InternalBondOrder == 3) { $MDLBondType = 3; last BONDTYPE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
873 if ($InternalBondOrder == 1.5) { $MDLBondType = 4; last BONDTYPE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
874 if ($InternalBondType =~ /^Any$/i) { $MDLBondType = 8; last BONDTYPE;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
875
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
876 $MDLBondType = 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
877
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
878 carp "Warning: InternalBondOrderToMDLBondType: Internal bond order and type values, $InternalBondOrder and $InternalBondType, don't match any valid MDL bond type: MDL bond type value, 1, has been assigned...";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
879 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
880 return $MDLBondType;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
881 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
882
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
883 # Third line: Comments - A blank line is also allowed.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
884 sub ParseCmpdCommentsLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
885 my($Line) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
886 my($Comments);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
887
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
888 $Comments = unpack("A80", $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
889
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
890 return ($Comments);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
891 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
892
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
893 # Map MDL bond stereo value used in SD and MOL files to internal bond stereochemistry values used by MayaChemTools...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
894 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
895 sub MDLBondStereoToInternalBondStereochemistry {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
896 my($MDLBondStereo) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
897 my($InternalBondStereo);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
898
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
899 $InternalBondStereo = '';
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
900
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
901 BONDSTEREO: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
902 if ($MDLBondStereo == 1) { $InternalBondStereo = 'Up'; last BONDSTEREO;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
903 if ($MDLBondStereo == 4) { $InternalBondStereo = 'UpOrDown'; last BONDSTEREO;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
904 if ($MDLBondStereo == 6) { $InternalBondStereo = 'Down'; last BONDSTEREO;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
905 if ($MDLBondStereo == 3) { $InternalBondStereo = 'CisOrTrans'; last BONDSTEREO;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
906 if ($MDLBondStereo == 0) { $InternalBondStereo = 'None'; last BONDSTEREO;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
907
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
908 $InternalBondStereo = '';
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
909 carp "Warning: MDLBondStereoToInternalBondType: MDL bond stereo value, $MDLBondStereo, is not supported: It has been ignored and bond order would be used to determine bond type...";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
910 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
911 return $InternalBondStereo;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
912 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
913
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
914 # Map internal bond stereochemistry values used by MayaChemTools to MDL bond stereo value used in SD and MOL files...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
915 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
916 sub InternalBondStereochemistryToMDLBondStereo {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
917 my($InternalBondStereo) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
918 my($MDLBondStereo);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
919
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
920 $MDLBondStereo = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
921
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
922 BONDSTEREO: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
923 if ($InternalBondStereo =~ /^Up$/i) { $MDLBondStereo = 1; last BONDSTEREO;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
924 if ($InternalBondStereo =~ /^UpOrDown$/i) { $MDLBondStereo = 4; last BONDSTEREO;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
925 if ($InternalBondStereo =~ /^Down$/) { $MDLBondStereo = 6; last BONDSTEREO;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
926 if ($InternalBondStereo =~ /^CisOrTrans$/) { $MDLBondStereo = 3; last BONDSTEREO;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
927
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
928 $MDLBondStereo = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
929 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
930 return $MDLBondStereo;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
931 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
932
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
933 # Fourth line: Counts
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
934 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
935 # Format: aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
936 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
937 # aaa: number of atoms; bbb: number of bonds; lll: number of atom lists; fff: (obsolete)
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
938 # ccc: chiral flag: 0=not chiral, 1=chiral; sss: number of stext entries; xxx,rrr,ppp,iii:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
939 # (obsolete); mmm: number of lines of additional properties, including the M END line, No
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
940 # longer supported, default is set to 999; vvvvvv: version
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
941
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
942 sub ParseCmpdCountsLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
943 my($Line) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
944 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
945
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
946 if (length($Line) >= 39) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
947 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = unpack("A3A3x3x3A3x3x3x3x3x3A3A6", $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
948 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
949 elsif (length($Line) >= 15) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
950 ($PropertyCount, $Version) = ("999", "v2000");
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
951 ($AtomCount, $BondCount, $ChiralFlag) = unpack("A3A3x3x3A3", $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
952 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
953 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
954 ($ChiralFlag, $PropertyCount, $Version) = ("0", "999", "v2000");
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
955 ($AtomCount, $BondCount) = unpack("A3A3", $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
956 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
957
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
958 if ($Version =~ /V3000/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
959 # Current version of MayaChemTools modules and classes for processing MDL MOL and SD don't support
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
960 # V3000. So instead of relying on callers, just exit with an error to disable any processing of V3000
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
961 # format.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
962 croak "Error: SDFileUtil::ParseCmpdCountsLine: The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools...";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
963 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
964
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
965 return ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
966 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
967
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
968 # Second line: Misc info
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
969 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
970 # Format: IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
971 # A2A8 A10 A2I2A10 A12 A6
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
972 # User's first and last initials (I), program name (P), date/time (M/D/Y,H:m),
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
973 # dimensional codes - 2D or 3D (d),scaling factors (S, s), energy (E) if modeling program input,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
974 # internal registry number (R) if input through MDL form. A blank line is also allowed.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
975 sub ParseCmpdMiscInfoLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
976 my($Line) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
977 my($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
978
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
979 ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum) = unpack("A2A8A10A2A2A10A12A6", $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
980 return ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
981 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
982
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
983 # First line: Molecule name. This line is unformatted, but like all other lines in a
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
984 # molfile may not extend beyond column 80. A blank line is also allowed.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
985 sub ParseCmpdMolNameLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
986 my($Line) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
987 my($MolName);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
988
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
989 $MolName = unpack("A80", $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
990
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
991 return ($MolName);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
992 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
993
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
994 # Parse atom alias property line in CTAB generic properties block.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
995 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
996 # Atom alias property line format:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
997 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
998 # A aaa
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
999 # x...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1000 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1001 # aaa: Atom number
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1002 # x: Atom alias in next line
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1003 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1004 sub ParseCmpdAtomAliasPropertyLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1005 my($Line, $NextLine) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1006 my($Label, $AtomNumber, $AtomAlias);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1007
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1008 ($Label, $AtomNumber) = split(' ', $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1009 $AtomAlias = $NextLine;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1010
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1011 if (!$AtomAlias) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1012 carp "Warning: _ParseCmpdAtomAliasPropertyLine: No atom alias value specified on the line following atom alias property line...";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1013 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1014
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1015 return ($AtomNumber, $AtomAlias);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1016 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1017
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1018 # Parse charge property line in CTAB generic properties block.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1019 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1020 # Charge property line format:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1021 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1022 # M CHGnn8 aaa vvv ...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1023 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1024 # nn8: Number of value pairs. Maximum of 8 pairs allowed.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1025 # aaa: Atom number
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1026 # vvv: -15 to +15. Default of 0 = uncharged atom. When present, this property supersedes
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1027 # all charge and radical values in the atom block, forcing a 0 charge on all atoms not
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1028 # listed in an M CHG or M RAD line.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1029 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1030 sub ParseCmpdChargePropertyLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1031 my($Line) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1032
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1033 return _ParseCmpdGenericPropertyLine('Charge', $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1034 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1035
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1036
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1037 # Parse isotope property line in CTAB generic properties block.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1038 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1039 # Isoptope property line format:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1040 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1041 # M ISOnn8 aaa vvv ...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1042 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1043 # nn8: Number of value paris. Maximum of 8 pairs allowed.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1044 # aaa: Atom number
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1045 # vvv: Absolute mass of the atom isotope as a positive integer. When present, this property
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1046 # supersedes all isotope values in the atom block. Default (no entry) means natural
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1047 # abundance. The difference between this absolute mass value and the natural
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1048 # abundance value specified in the PTABLE.DAT file must be within the range of -18
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1049 # to +12
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1050 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1051 # Notes:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1052 # . Values correspond to mass numbers...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1053 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1054 sub ParseCmpdIsotopePropertyLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1055 my($Line) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1056
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1057 return _ParseCmpdGenericPropertyLine('Isotope', $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1058 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1059
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1060 # Parse radical property line in CTAB generic properties block.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1061 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1062 # Radical property line format:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1063 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1064 # M RADnn8 aaa vvv ...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1065 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1066 # nn8: Number of value paris. Maximum of 8 pairs allowed.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1067 # aaa: Atom number
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1068 # vvv: Default of 0 = no radical, 1 = singlet, 2 = doublet, 3 = triplet . When
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1069 # present, this property supersedes all charge and radical values in the atom block,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1070 # forcing a 0 (zero) charge and radical on all atoms not listed in an M CHG or
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1071 # M RAD line.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1072 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1073 sub ParseCmpdRadicalPropertyLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1074 my($Line) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1075
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1076 return _ParseCmpdGenericPropertyLine('Radical', $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1077 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1078
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1079 # Map MDL radical stereo value used in SD and MOL files to internal spin multiplicity values used by MayaChemTools...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1080 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1081 sub MDLRadicalToInternalSpinMultiplicity {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1082 my($MDLRadical) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1083 my($InternalSpinMultiplicity);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1084
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1085 $InternalSpinMultiplicity = '';
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1086
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1087 SPINMULTIPLICITY: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1088 if ($MDLRadical == 0) { $InternalSpinMultiplicity = 0; last SPINMULTIPLICITY;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1089 if ($MDLRadical == 1) { $InternalSpinMultiplicity = 1; last SPINMULTIPLICITY;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1090 if ($MDLRadical == 2) { $InternalSpinMultiplicity = 2; last SPINMULTIPLICITY;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1091 if ($MDLRadical == 3) { $InternalSpinMultiplicity = 3; last SPINMULTIPLICITY;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1092 $InternalSpinMultiplicity = '';
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1093 carp "Warning: MDLRadicalToInternalSpinMultiplicity: MDL radical value, $MDLRadical, specifed on line M RAD is not supported...";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1094 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1095 return $InternalSpinMultiplicity;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1096 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1097
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1098 # Map internal spin multiplicity values used by MayaChemTools to MDL radical stereo value used in SD and MOL files...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1099 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1100 sub InternalSpinMultiplicityToMDLRadical {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1101 my($InternalSpinMultiplicity) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1102 my($MDLRadical);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1103
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1104 $MDLRadical = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1105
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1106 SPINMULTIPLICITY: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1107 if ($InternalSpinMultiplicity == 1) { $MDLRadical = 1; last SPINMULTIPLICITY;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1108 if ($InternalSpinMultiplicity == 2) { $MDLRadical = 2; last SPINMULTIPLICITY;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1109 if ($InternalSpinMultiplicity == 3) { $MDLRadical = 3; last SPINMULTIPLICITY;}
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1110 $MDLRadical = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1111 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1112 return $MDLRadical;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1113 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1114
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1115 # Process generic CTAB property line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1116 sub _ParseCmpdGenericPropertyLine {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1117 my($PropertyName, $Line) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1118
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1119 my($Label, $PropertyLabel, $ValuesCount, $ValuePairsCount, @ValuePairs);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1120
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1121 @ValuePairs = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1122 ($Label, $PropertyLabel, $ValuesCount, @ValuePairs) = split(' ', $Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1123 $ValuePairsCount = (scalar @ValuePairs)/2;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1124 if ($ValuesCount != $ValuePairsCount) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1125 carp "Warning: _ParseCmpdGenericPropertyLine: Number of atom number and $PropertyName value paris specified on $Label $PropertyLabel property line, $ValuePairsCount, does not match expected value of $ValuesCount...";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1126 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1127
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1128 return (@ValuePairs);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1129 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1130
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1131 # Generic CTAB property lines for charge, istope and radical properties...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1132 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1133 sub _GenerateCmpdGenericPropertyLines {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1134 my($PropertyName, $PropertyValuePairsRef) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1135 my($Index, $PropertyLabel, $Line, $PropertyCount, $AtomNum, $PropertyValue, @PropertyLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1136
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1137 @PropertyLines = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1138 NAME: {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1139 if ($PropertyName =~ /^Charge$/i) { $PropertyLabel = "M CHG"; last NAME; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1140 if ($PropertyName =~ /^Isotope$/i) { $PropertyLabel = "M ISO"; last NAME; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1141 if ($PropertyName =~ /^Radical$/i) { $PropertyLabel = "M RAD"; last NAME; }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1142 carp "Warning: _GenerateCmpdGenericPropertyLines: Unknown property name, $PropertyName, specified...";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1143 return @PropertyLines;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1144 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1145
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1146 # A maximum of 8 property pair values allowed per line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1147 $PropertyCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1148 $Line = '';
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1149 for ($Index = 0; $Index < $#{$PropertyValuePairsRef}; $Index += 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1150 if ($PropertyCount > 8) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1151 # Setup property line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1152 $Line = "${PropertyLabel} 8${Line}";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1153 push @PropertyLines, $Line;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1154
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1155 $PropertyCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1156 $Line = '';
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1157 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1158 $PropertyCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1159 $AtomNum = $PropertyValuePairsRef->[$Index];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1160 $PropertyValue = $PropertyValuePairsRef->[$Index + 1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1161 $Line .= sprintf " %3i %3i", $AtomNum, $PropertyValue;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1162 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1163 if ($Line) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1164 $Line = "${PropertyLabel} ${PropertyCount}${Line}";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1165 push @PropertyLines, $Line;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1166 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1167 return @PropertyLines;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1168 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1169
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1170 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1171 # Read compound data into a string and return its value
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1172 sub ReadCmpdString {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1173 my($SDFileRef) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1174 my($CmpdString);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1175
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1176 $CmpdString = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1177 LINE: while (defined($_ = <$SDFileRef>)) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1178 # Change Windows and Mac new line char to UNIX...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1179 s/(\r\n)|(\r)/\n/g;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1180
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1181 if (/^\$\$\$\$/) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1182 # Take out any new line char at the end by explicitly removing it instead of using
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1183 # chomp, which might not always work correctly on files generated on a system
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1184 # with a value of input line separator different from the current system...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1185 s/\n$//g;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1186
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1187 # Doesn't hurt to chomp...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1188 chomp;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1189
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1190 $CmpdString .= $_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1191 last LINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1192 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1193 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1194 $CmpdString .= $_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1195 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1196 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1197 return $CmpdString;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1198 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1199
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1200 # Find out the number of fragements in the compounds. And for the compound with
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1201 # more than one fragment, remove all the others besides the largest one.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1202 sub WashCmpd {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1203 my($CmpdLines) = @_;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1204 my($WashedCmpdString, $FragmentCount, $Fragments);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1205
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1206 $WashedCmpdString = "";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1207 ($FragmentCount, $Fragments) = GetCmpdFragments($CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1208 if ($FragmentCount > 1) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1209 # Go over the compound data for the largest fragment including property
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1210 # data...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1211 my (@AllFragments, @LargestFragment, %LargestFragmentAtoms, @WashedCmpdLines, $Index, $LineIndex, $AtomCount, $BondCount, $NewAtomCount, $NewBondCount, $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo, $FirstNewAtomNum, $SecondNewAtomNum, $AtomNum, $ChiralFlag, $BondLine, $MENDLineIndex, $Line, $Value, @ValuePairs, @NewValuePairs, $ValuePairIndex, $NewAtomNum, @NewPropertyLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1212
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1213 @AllFragments = (); @LargestFragment = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1214 %LargestFragmentAtoms = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1215 @AllFragments = split "\n", $Fragments;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1216 @LargestFragment = split " ", $AllFragments[0];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1217 for $Index (0 .. $#LargestFragment) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1218 # Map old atom numbers to new atom numbers as the fragment atom numbers are sorted
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1219 # from lowest to highest old atom numbers...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1220 $LargestFragmentAtoms{$LargestFragment[$Index]} = $Index + 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1221 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1222 @WashedCmpdLines = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1223 push @WashedCmpdLines, @$CmpdLines[0], @$CmpdLines[1], @$CmpdLines[2], @$CmpdLines[3];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1224 ($AtomCount, $BondCount, $ChiralFlag) = ParseCmpdCountsLine(@$CmpdLines[3]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1225 $NewAtomCount = @LargestFragment;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1226 $NewBondCount = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1227 $AtomNum = 0;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1228 # Retrieve the largest fragment atom lines...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1229 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1230 $AtomNum++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1231 if ($LargestFragmentAtoms{$AtomNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1232 push @WashedCmpdLines, @$CmpdLines[$LineIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1233 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1234 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1235 # Retrieve the largest fragment bond lines...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1236 for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1237 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1238 if ($LargestFragmentAtoms{$FirstAtomNum} && $LargestFragmentAtoms{$SecondAtomNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1239 $NewBondCount++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1240 # Set up bond line with new atom number mapping...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1241 $FirstNewAtomNum = $LargestFragmentAtoms{$FirstAtomNum};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1242 $SecondNewAtomNum = $LargestFragmentAtoms{$SecondAtomNum};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1243 $BondLine = GenerateCmpdBondLine($FirstNewAtomNum, $SecondNewAtomNum, $BondType, $BondStereo);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1244 push @WashedCmpdLines, $BondLine;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1245 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1246 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1247 # Get property lines for CHG, ISO and RAD label and map the old atom numbers to new
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1248 # atom numners; Others, property lines before M END line are skipped as atom numbers for
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1249 # other properties might not valid anymore...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1250 #
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1251 $MENDLineIndex = $LineIndex;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1252 LINE: for ($LineIndex = (4 + $AtomCount + $BondCount); $LineIndex < @$CmpdLines; $LineIndex++) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1253 $Line = @$CmpdLines[$LineIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1254 if ($Line =~ /^M END/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1255 push @WashedCmpdLines, "M END";
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1256 $MENDLineIndex = $LineIndex;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1257 last LINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1258 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1259
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1260 @ValuePairs = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1261 if ($Line =~ /^M CHG/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1262 @ValuePairs = ParseCmpdChargePropertyLine($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1263 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1264 elsif ($Line =~ /^M RAD/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1265 @ValuePairs = ParseCmpdRadicalPropertyLine($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1266 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1267 elsif ($Line =~ /^M ISO/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1268 @ValuePairs = ParseCmpdIsotopePropertyLine($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1269 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1270 elsif ($Line =~ /^A /i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1271 my($NextLine);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1272 $LineIndex++;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1273 $NextLine = @$CmpdLines[$LineIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1274 @ValuePairs = ParseCmpdAtomAliasPropertyLine($Line, $NextLine);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1275 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1276 else {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1277 next LINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1278 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1279
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1280 if (!@ValuePairs) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1281 next LINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1282 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1283
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1284 # Collect values for valid atom numbers with mapping to new atom numbers...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1285 @NewValuePairs = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1286 VALUEINDEX: for ($ValuePairIndex = 0; $ValuePairIndex < $#ValuePairs; $ValuePairIndex += 2) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1287 $AtomNum = $ValuePairs[$ValuePairIndex]; $Value = $ValuePairs[$ValuePairIndex + 1];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1288 if (!exists $LargestFragmentAtoms{$AtomNum}) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1289 next VALUEINDEX;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1290 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1291 $NewAtomNum = $LargestFragmentAtoms{$AtomNum};
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1292 push @NewValuePairs, ($NewAtomNum, $Value)
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1293 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1294 if (!@NewValuePairs) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1295 next LINE;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1296 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1297 @NewPropertyLines = ();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1298 if ($Line =~ /^M CHG/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1299 @NewPropertyLines = GenerateCmpdChargePropertyLines(\@NewValuePairs);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1300 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1301 elsif ($Line =~ /^M RAD/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1302 @NewPropertyLines = GenerateCmpdRadicalPropertyLines(\@NewValuePairs);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1303 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1304 elsif ($Line =~ /^M ISO/i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1305 @NewPropertyLines = GenerateCmpdIsotopePropertyLines(\@NewValuePairs);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1306 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1307 elsif ($Line =~ /^A /i) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1308 @NewPropertyLines = GenerateCmpdAtomAliasPropertyLines(\@NewValuePairs);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1309 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1310 push @WashedCmpdLines, @NewPropertyLines;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1311 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1312
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1313 # Retrieve rest of the data label and value property data...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1314 for ($LineIndex = (1 + $MENDLineIndex); $LineIndex < @$CmpdLines; $LineIndex++) {
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1315 push @WashedCmpdLines, @$CmpdLines[$LineIndex];
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1316 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1317 # Update atom and bond count line...
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1318 $WashedCmpdLines[3] = GenerateCmpdCountsLine($NewAtomCount, $NewBondCount, $ChiralFlag);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1319
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1320 $WashedCmpdString = join "\n", @WashedCmpdLines;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1321 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1322 return ($FragmentCount, $Fragments, $WashedCmpdString);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1323 }
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1324
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1325 1;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1326
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1327 __END__
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1328
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1329 =head1 NAME
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1330
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1331 SDFileUtil
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1332
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1333 =head1 SYNOPSIS
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1334
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1335 use SDFileUtil ;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1336
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1337 use SDFileUtil qw(:all);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1338
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1339 =head1 DESCRIPTION
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1340
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1341 B<SDFileUtil> module provides the following functions:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1342
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1343 GenerateCmpdAtomAliasPropertyLines, GenerateCmpdAtomLine, GenerateCmpdBondLine,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1344 GenerateCmpdChargePropertyLines, GenerateCmpdCommentsLine, GenerateCmpdCountsLine,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1345 GenerateCmpdDataHeaderLabelsAndValuesLines, GenerateCmpdIsotopePropertyLines,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1346 GenerateCmpdMiscInfoLine, GenerateCmpdMolNameLine,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1347 GenerateCmpdRadicalPropertyLines, GenerateEmptyCtabBlockLines,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1348 GenerateMiscLineDateStamp, GetAllAndCommonCmpdDataHeaderLabels,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1349 GetCmpdDataHeaderLabels, GetCmpdDataHeaderLabelsAndValues, GetCmpdFragments,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1350 GetCtabLinesCount, GetInvalidAtomNumbers, GetUnknownAtoms,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1351 InternalBondOrderToMDLBondType, InternalBondStereochemistryToMDLBondStereo,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1352 InternalChargeToMDLCharge, InternalSpinMultiplicityToMDLRadical, IsCmpd2D,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1353 IsCmpd3D, MDLBondStereoToInternalBondStereochemistry,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1354 MDLBondTypeToInternalBondOrder, MDLChargeToInternalCharge,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1355 MDLRadicalToInternalSpinMultiplicity, ParseCmpdAtomAliasPropertyLine,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1356 ParseCmpdAtomLine, ParseCmpdBondLine, ParseCmpdChargePropertyLine,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1357 ParseCmpdCommentsLine, ParseCmpdCountsLine, ParseCmpdIsotopePropertyLine,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1358 ParseCmpdMiscInfoLine, ParseCmpdMolNameLine, ParseCmpdRadicalPropertyLine,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1359 ReadCmpdString, RemoveCmpdDataHeaderLabelAndValue, WashCmpd
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1360
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1361 =head1 METHODS
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1362
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1363 =over 4
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1364
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1365 =item B<GenerateCmpdAtomAliasPropertyLines>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1366
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1367 @Lines = GenerateCmpdAtomAliasPropertyLines($AliasValuePairsRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1368
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1369 Returns a formatted atom alias property lines corresponding to successive pairs
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1370 of atom number and alias values specified by a refernce to an array. Two lines
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1371 are generate for each atom number and alias value pairs: First line - A <AtomNum>;
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1372 Second line:<AtomAlias>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1373
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1374 =item B<GenerateCmpdAtomLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1375
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1376 $Line = GenerateCmpdAtomLine($AtomSymbol, $AtomX, $AtomY,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1377 $AtomZ, [$MassDifference, $Charge, $StereoParity]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1378
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1379 Returns a formatted atom data line containing all the input values.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1380
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1381 =item B<GenerateCmpdBondLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1382
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1383 $Line = GenerateCmpdBondLine($FirstAtomNum, $SecondAtomNum,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1384 $BondType, [$BondStereo]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1385
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1386 Returns a formatted bond data line containing all the input values.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1387
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1388 =item B<GenerateCmpdChargePropertyLines>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1389
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1390 @Lines = GenerateCmpdChargePropertyLines($ChargeValuePairsRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1391
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1392 Returns a formatted M CHG property lines corresponding to successive pairs of
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1393 atom number and charge values specified by a refernce to an array.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1394
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1395 =item B<GenerateCmpdCommentsLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1396
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1397 $Line = GenerateCmpdCommentsLine($Comments);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1398
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1399 Returns a formatted comments data line.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1400
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1401 =item B<GenerateCmpdCountsLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1402
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1403 $Line = GenerateCmpdCountsLine($AtomCount, $BondCount,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1404 $ChiralFlag, [$PropertyCount, $Version]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1405
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1406 Returns a formatted line containing all the input values. The default values of 999
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1407 and V2000 are used for I<PropertyCount> and I<Version>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1408
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1409 =item B<GenerateCmpdDataHeaderLabelsAndValuesLines>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1410
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1411 @Lines = GenerateCmpdDataHeaderLabelsAndValuesLines(
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1412 $DataHeaderLabelsRef, $DataHeaderLabelsAndValuesRef,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1413 [$SortDataLabels]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1414
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1415 Returns formatted data lines containing header label and values lines corresponding to
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1416 all data header labels in array reference I<DataHeaderLabelsRef> with values in hash
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1417 reference I<DataHeaderLabelsAndValuesRef>. By default, data header labels are
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1418 not sorted and correspond to the label order in array reference I<DataHeaderLabelsRef>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1419
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1420 =item B<GenerateCmpdIsotopePropertyLines>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1421
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1422 @Lines = GenerateCmpdIsotopePropertyLines($IsotopeValuePairsRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1423
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1424 Returns a formatted M ISO property lines corresponding to successive pairs of
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1425 atom number and isotope values specified by a refernce to an array.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1426
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1427 =item B<GenerateCmpdMiscInfoLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1428
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1429 $Line = GenerateCmpdMiscInfoLine([$ProgramName, $UserInitial,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1430 $Code]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1431
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1432 Returns a formatted line containing specified user initial, program name, date and code.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1433 Default values are: I<ProgramName - MayaChem; UserInitial - NULL; Code - 2D>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1434
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1435 =item B<GenerateCmpdMolNameLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1436
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1437 $Line = GenerateCmpdMolNameLine($MolName);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1438
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1439 Returns a formatted molecule name data line.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1440
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1441 =item B<GenerateCmpdRadicalPropertyLines>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1442
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1443 @Lines = GenerateCmpdRadicalPropertyLines($RadicalValuePairsRef);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1444
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1445 Returns a formatted M CHG property lines corresponding to successive pairs of
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1446 atom number and multiplicity values specified by a refernce to an array.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1447
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1448 =item B<GenerateEmptyCtabBlockLines>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1449
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1450 $Lines = GenerateCmpdMiscInfoLine([$Date]);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1451
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1452 Returns formatted lines representing empty CTAB block.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1453
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1454 =item B<GenerateMiscLineDateStamp>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1455
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1456 $Line = GenerateMiscLineDateStamp();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1457
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1458 Returns date stamp for misc line.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1459
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1460 =item B<GetAllAndCommonCmpdDataHeaderLabels>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1461
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1462 ($CmpdCount, $DataFieldLabelsArrayRef,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1463 $CommonDataFieldLabelsArrayRef) =
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1464 GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1465
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1466 Returns number of comopunds, a reference to an array containing all unique data header
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1467 label and a reference to an array containing common data field labels for all compounds
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1468 in SD file.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1469
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1470 =item B<GetCmpdDataHeaderLabels>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1471
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1472 (@Labels) = GetCmpdDataHeaderLabels(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1473
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1474 Returns an array containg data header labels for a compound
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1475
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1476 =item B<GetCmpdDataHeaderLabelsAndValues>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1477
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1478 (%DataValues) = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1479
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1480 Returns a hash conating data header labes and values for a compound.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1481
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1482 =item B<GetCmpdFragments>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1483
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1484 ($FragmentCount, $FragmentString) = GetCmpdFragments(\@CmpLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1485
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1486 Figures out the number of disconnected fragments and return their values along
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1487 with the atom numbers in a string delimited by new line character. Fragment data
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1488 in B<FragmentString> is sorted on based on its size.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1489
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1490 =item B<GetCtabLinesCount>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1491
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1492 $CtabLinesCount = GetCtabLinesCount(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1493
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1494 Returns number of lines present between the 4th line and the line containg "M END".
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1495
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1496 =item B<GetInvalidAtomNumbers>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1497
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1498 ($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) =
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1499 GetInvalidAtomNumbers(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1500
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1501 Returns a list of values containing information about invalid atom numbers present
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1502 in block or atom property lines.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1503
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1504 =item B<GetUnknownAtoms>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1505
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1506 ($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) =
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1507 GetUnknownAtoms(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1508
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1509 Returns a list of values containing information about atoms which contain special element
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1510 symbols not present in the periodic table.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1511
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1512 =item B<InternalBondOrderToMDLBondType>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1513
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1514 $MDLBondType = InternalBondOrderToMDLBondType($InternalBondOrder);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1515
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1516 Returns value of I<MDLBondType> corresponding to I<InternalBondOrder>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1517
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1518 InternalBondOrder MDLBondType
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1519
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1520 1 1
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1521 2 2
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1522 3 3
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1523 1.5 4
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1524
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1525 =item B<InternalBondStereochemistryToMDLBondStereo>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1526
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1527 $MDLBondStereo = InternalBondStereochemistryToMDLBondStereo(
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1528 $InternalBondStereo);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1529
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1530 Returns value of I<MDLBondStereo> corresponding to I<InternalBondStereo> using following
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1531 mapping:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1532
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1533 InternalBondStereo MDLBondStereo
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1534
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1535 Up 1
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1536 UpOrDown 4
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1537 Down 6
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1538 CisOrTrans 3
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1539 Other 0
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1540
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1541 =item B<InternalChargeToMDLCharge>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1542
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1543 $MDLCharge = InternalChargeToMDLCharge($InternalCharge);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1544
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1545 Returns value of I<MDLCharge> corresponding to I<InternalCharge> using following
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1546 mapping:
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1547
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1548 InternalCharge MDLCharge
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1549
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1550 3 1
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1551 2 2
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1552 1 3
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1553 -1 5
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1554 -2 6
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1555 -3 7
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1556
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1557 =item B<InternalSpinMultiplicityToMDLRadical>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1558
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1559 $MDLRadical = InternalSpinMultiplicityToMDLRadical(
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1560 $InternalSpinMultiplicity);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1561
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1562 Returns value of I<MDLRadical> corresponding to I<InternalSpinMultiplicity>. These
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1563 value are equivalent.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1564
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1565 =item B<MDLBondStereoToInternalBondType>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1566
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1567 $InternalBondType = MDLBondStereoToInternalBondType($MDLBondStereo);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1568
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1569 Returns value of I<InternalBondType> corresponding to I<MDLBondStereo> using
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1570 mapping shown for B<InternalBondTypeToMDLBondStereo> function.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1571
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1572 =item B<IsCmpd2D>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1573
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1574 $Status = IsCmpd2D();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1575
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1576 Returns 1 or 0 based on whether z-coordinate of any atom is non-zero.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1577
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1578 =item B<IsCmpd3D>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1579
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1580 $Status = IsCmpd3D();
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1581
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1582 Returns 1 or 0 based on whether z-coordinate of any atom is non-zero.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1583
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1584 =item B<MDLBondStereoToInternalBondStereochemistry>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1585
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1586 $InternalBondStereo = MDLBondStereoToInternalBondStereochemistry(
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1587 $MDLBondStereo);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1588
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1589 Returns value of I<InternalBondStereo> corresponding to I<MDLBondStereo> using
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1590 mapping shown for B<InternalBondStereochemistryToMDLBondStereo> function.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1591
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1592 =item B<MDLBondTypeToInternalBondOrder>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1593
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1594 $InternalBondOrder = MDLBondTypeToInternalBondOrder($MDLBondType);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1595
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1596 Returns value of I<InternalBondOrder> corresponding to I<MDLBondType> using
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1597 mapping shown for B<InternalBondOrderToMDLBondType> function.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1598
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1599 =item B<MDLChargeToInternalCharge>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1600
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1601 $InternalCharge = MDLChargeToInternalCharge($MDLCharge);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1602
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1603 Returns value of I<$InternalCharge> corresponding to I<MDLCharge> using
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1604 mapping shown for B<InternalChargeToMDLCharge> function.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1605
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1606 =item B<MDLRadicalToInternalSpinMultiplicity>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1607
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1608 $InternalSpinMultiplicity = MDLRadicalToInternalSpinMultiplicity(
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1609 $MDLRadical);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1610
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1611 Returns value of I<InternalSpinMultiplicity> corresponding to I<MDLRadical>. These
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1612 value are equivalent.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1613
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1614 =item B<ParseCmpdAtomAliasPropertyLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1615
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1616 @AtomNumAndValuePairs = ParseCmpdAtomAliasPropertyLine(
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1617 $CurrentLine, $NexLine);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1618
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1619 Parses atom alias propery lines in CTAB generic properties block and returns an array
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1620 with successive pairs of values corresponding to atom number and its alias.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1621
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1622 =item B<ParseCmpdAtomLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1623
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1624 ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1625 $StereoParity) = ParseCmpdAtomLine($AtomDataLine);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1626
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1627 Parses compound data line containing atom information and returns a list
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1628 of values.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1629
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1630 =item B<ParseCmpdBondLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1631
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1632 ($FirstAtomNum, $SecondAtomNum, $BondType) =
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1633 ParseCmpdBondLine($BondDataLine);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1634
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1635 Parses compound data line containing bond information and returns a list of
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1636 values.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1637
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1638 =item B<ParseCmpdCommentsLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1639
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1640 $Comments = ParseCmpdCommentsLine($CommentsDataLine);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1641
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1642 Returns the comment string.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1643
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1644 =item B<ParseCmpdChargePropertyLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1645
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1646 @AtomNumAndValuePairs = ParseCmpdChargePropertyLine(
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1647 $ChargeDataLine);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1648
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1649 Parses charge propery line in CTAB generic properties block and returns an array
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1650 with successive pairs of values corresponding to atom number and its charge.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1651
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1652 =item B<ParseCmpdCountsLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1653
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1654 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) =
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1655 ParseCmpdCountsLine(\@CountDataLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1656
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1657 Returns a list of values containing count information.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1658
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1659 =item B<ParseCmpdMiscInfoLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1660
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1661 ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1662 $Energy, $RegistryNum) = ParseCmpdMiscInfoLine($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1663
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1664 Returns a list of values containing miscellaneous information.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1665
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1666 =item B<ParseCmpdIsotopePropertyLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1667
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1668 @AtomNumAndValuePairs = ParseCmpdIsotopePropertyLine(
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1669 $IsotopeDataLine);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1670
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1671 Parses isotopic propery line in CTAB generic properties block and returns an array
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1672 with successive pairs of values corresponding to atom number and absolute mass of
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1673 atom isotope.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1674
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1675 =item B<ParseCmpdMolNameLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1676
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1677 $MolName = ParseCmpdMolNameLine($Line);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1678
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1679 Returns a string containing molecule name.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1680
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1681 =item B<ParseCmpdRadicalPropertyLine>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1682
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1683 @AtomNumAndValuePairs = ParseCmpdRadicalPropertyLine(
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1684 $RadicalDataLine);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1685
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1686 Parses radical propery line in CTAB generic properties block and returns an array
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1687 with successive pairs of values corresponding to atom number and radical number
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1688 value.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1689
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1690 =item B<RemoveCmpdDataHeaderLabelAndValue>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1691
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1692 $NewCmpdString = RemoveCmpdDataHeaderLabelAndValue($CmpdString,
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1693 $DataHeaderLabel);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1694
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1695 Returns a B<NewCmpdString> after removing I<DataHeaderLabel> along with its
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1696 value from I<CmpdString>.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1697
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1698 =item B<ReadCmpdString>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1699
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1700 $CmpdString = ReadCmpdString(\*SDFILEHANDLE);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1701
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1702 Returns a string containing all the data lines for the next available compound
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1703 in an already open file indicated by SDFILEHANDLE. A NULL string is returned
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1704 on EOF.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1705
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1706 =item B<WashCmpd>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1707
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1708 ($FragmentCount, $Fragments, $WashedCmpdString) =
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1709 WashCmpd(\@CmpdLines);
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1710
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1711 Figures out the number of disconnected fragments and return their values along
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1712 with the atom numbers in a string delimited by new line character. Fragment data
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1713 in B<FragmentString> is sorted on based on its size.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1714
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1715 =back
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1716
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1717 =head1 AUTHOR
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1718
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1719 Manish Sud <msud@san.rr.com>
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1720
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1721 =head1 SEE ALSO
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1722
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1723 TextUtil.pm
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1724
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1725 =head1 COPYRIGHT
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1726
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1727 Copyright (C) 2015 Manish Sud. All rights reserved.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1728
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1729 This file is part of MayaChemTools.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1730
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1731 MayaChemTools is free software; you can redistribute it and/or modify it under
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1732 the terms of the GNU Lesser General Public License as published by the Free
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1733 Software Foundation; either version 3 of the License, or (at your option)
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1734 any later version.
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1735
4816e4a8ae95 Uploaded
deepakjadmin
parents:
diff changeset
1736 =cut