annotate lib/SDFileUtil.pm @ 3:90ea638ce878 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:11:59 -0500
parents 2abf0d43254d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1 package SDFileUtil;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
2 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: SDFileUtil.pm,v $
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/02/28 20:47:18 $
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.49 $
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
6 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
8 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
10 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
12 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
17 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
22 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
27 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
28
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
30 use Exporter;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
31 use Carp;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
32 use PeriodicTable qw(IsElement);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
33 use TimeUtil ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
34
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
36
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
37 @ISA = qw(Exporter);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
38 @EXPORT = qw(GenerateCmpdAtomLine GenerateCmpdBondLine GenerateCmpdChargePropertyLines GenerateCmpdCommentsLine GenerateCmpdCountsLine GenerateCmpdAtomAliasPropertyLines GenerateCmpdIsotopePropertyLines GenerateCmpdDataHeaderLabelsAndValuesLines GenerateCmpdMiscInfoLine GenerateCmpdRadicalPropertyLines GenerateCmpdMolNameLine GenerateEmptyCtabBlockLines GenerateMiscLineDateStamp GetAllAndCommonCmpdDataHeaderLabels GetCmpdDataHeaderLabels GetCmpdDataHeaderLabelsAndValues GetCmpdFragments GetCtabLinesCount GetUnknownAtoms GetInvalidAtomNumbers MDLChargeToInternalCharge InternalChargeToMDLCharge MDLBondTypeToInternalBondOrder InternalBondOrderToMDLBondType MDLBondStereoToInternalBondStereochemistry InternalBondStereochemistryToMDLBondStereo InternalSpinMultiplicityToMDLRadical MDLRadicalToInternalSpinMultiplicity IsCmpd3D IsCmpd2D ParseCmpdAtomLine ParseCmpdBondLine ParseCmpdCommentsLine ParseCmpdCountsLine ParseCmpdMiscInfoLine ParseCmpdMolNameLine ParseCmpdAtomAliasPropertyLine ParseCmpdChargePropertyLine ParseCmpdIsotopePropertyLine ParseCmpdRadicalPropertyLine ReadCmpdString RemoveCmpdDataHeaderLabelAndValue WashCmpd);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
39 @EXPORT_OK = qw();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
40 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
41
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
42 # Format data for compounds count line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
43 sub GenerateCmpdCountsLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
44 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version, $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
45
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
46 if (@_ == 5) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
47 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
48 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
49 elsif (@_ == 3) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
50 ($AtomCount, $BondCount, $ChiralFlag) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
51 $PropertyCount = 999;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
52 $Version = "V2000";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
53 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
54 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
55 ($AtomCount, $BondCount) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
56 $ChiralFlag = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
57 $PropertyCount = 999;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
58 $Version = "V2000";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
59 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
60 if ($AtomCount > 999) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
61 croak "Error: SDFileUtil::GenerateCmpdCountsLine: The atom count, $AtomCount, exceeds maximum of 999 allowed for CTAB version 2000. The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
62 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
63 $Line = sprintf "%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%6s", $AtomCount, $BondCount, 0, 0, $ChiralFlag, 0, 0, 0, 0, 0, $PropertyCount, $Version;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
64
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
65 return ($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
66 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
67
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
68 # Generate comments line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
69 sub GenerateCmpdCommentsLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
70 my($Comments) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
71 my($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
72
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
73 $Line = (length($Comments) > 80) ? substr($Comments, 0, 80) : $Comments;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
74
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
75 return $Line;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
76 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
77
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
78 # Generate molname line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
79 sub GenerateCmpdMolNameLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
80 my($MolName) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
81 my($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
82
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
83 $Line = (length($MolName) > 80) ? substr($MolName, 0, 80) : $MolName;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
84
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
85 return $Line;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
86 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
87
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
88 # Generate data for compounds misc info line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
89 sub GenerateCmpdMiscInfoLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
90 my($ProgramName, $UserInitial, $Code) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
91 my($Date, $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
92
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
93 if (!(defined($ProgramName) && $ProgramName)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
94 $ProgramName = "MayaChem";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
95 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
96 if (!(defined($UserInitial) && $UserInitial)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
97 $UserInitial = " ";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
98 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
99 if (!(defined($Code) && $Code)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
100 $Code = "2D";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
101 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
102
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
103 if (length($ProgramName) > 8) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
104 $ProgramName = substr($ProgramName, 0, 8);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
105 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
106 if (length($UserInitial) > 2) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
107 $UserInitial = substr($UserInitial, 0, 2);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
108 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
109 if (length($Code) > 2) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
110 $Code = substr($Code, 0, 2);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
111 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
112 $Date = GenerateMiscLineDateStamp();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
113
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
114 $Line = "${UserInitial}${ProgramName}${Date}${Code}";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
115
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
116 return $Line;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
117 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
118
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
119 # Generate data for compounds misc info line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
120 sub GenerateEmptyCtabBlockLines {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
121 my($Date, $Lines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
122
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
123 if (@_ == 1) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
124 ($Date) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
125 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
126 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
127 $Date = GenerateMiscLineDateStamp();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
128 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
129 # First line: Blank molname line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
130 # Second line: Misc info...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
131 # Third line: Blank comments line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
132 # Fourth line: Counts line reflecting empty structure data block...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
133 $Lines = "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
134 $Lines .= " MayaChem${Date}2D\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
135 $Lines .= "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
136 $Lines .= GenerateCmpdCountsLine(0, 0, 0) . "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
137 $Lines .= "M END";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
138
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
139 return $Lines;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
140 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
141
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
142 # Generate SD file data stamp...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
143 sub GenerateMiscLineDateStamp {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
144 return TimeUtil::SDFileTimeStamp();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
145 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
146
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
147 # Generate data for compound atom line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
148 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
149 sub GenerateCmpdAtomLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
150 my($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
151 my($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
152
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
153 if (!defined $MassDifference) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
154 $MassDifference = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
155 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
156 if (!defined $Charge) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
157 $Charge = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
158 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
159 if (!defined $StereoParity) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
160 $StereoParity = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
161 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
162 $Line = sprintf "%10.4f%10.4f%10.4f %-3s%2i%3i%3i 0 0 0 0 0 0 0 0 0", $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
163
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
164 return $Line
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
165 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
166
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
167 # Generate data for compound bond line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
168 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
169 sub GenerateCmpdBondLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
170 my($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
171 my($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
172
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
173 if (!defined $BondStereo) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
174 $BondStereo = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
175 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
176 $Line = sprintf "%3i%3i%3i%3i 0 0 0", $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
177
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
178 return $Line
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
179 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
180
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
181 # Generate charge property lines for CTAB block...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
182 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
183 sub GenerateCmpdChargePropertyLines {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
184 my($ChargeValuePairsRef) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
185
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
186 return _GenerateCmpdGenericPropertyLines('Charge', $ChargeValuePairsRef);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
187 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
188
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
189 # Generate isotope property lines for CTAB block...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
190 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
191 sub GenerateCmpdIsotopePropertyLines {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
192 my($IsotopeValuePairsRef) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
193
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
194 return _GenerateCmpdGenericPropertyLines('Isotope', $IsotopeValuePairsRef);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
195 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
196
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
197 # Generate radical property line property lines for CTAB block...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
198 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
199 sub GenerateCmpdRadicalPropertyLines {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
200 my($RadicalValuePairsRef) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
201
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
202 return _GenerateCmpdGenericPropertyLines('Radical', $RadicalValuePairsRef);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
203 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
204
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
205 # Generate atom alias property line property lines for CTAB block...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
206 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
207 # Atom alias property line format:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
208 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
209 # A aaa
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
210 # x...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
211 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
212 # aaa: Atom number
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
213 # x: Atom alias in next line
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
214 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
215 sub GenerateCmpdAtomAliasPropertyLines {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
216 my($PropertyValuePairsRef) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
217 my($Index, $AtomNum, $AtomAlias, $Line, @PropertyLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
218
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
219 @PropertyLines = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
220
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
221 for ($Index = 0; $Index < $#{$PropertyValuePairsRef}; $Index += 2) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
222 $AtomNum = $PropertyValuePairsRef->[$Index];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
223 $AtomAlias = $PropertyValuePairsRef->[$Index + 1];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
224
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
225 $Line = "A " . sprintf "%3i", $AtomNum;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
226
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
227 push @PropertyLines, $Line;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
228 push @PropertyLines, $AtomAlias;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
229 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
230
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
231 return @PropertyLines;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
232 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
233
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
234 # Generate data header labels and values lines...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
235 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
236 sub GenerateCmpdDataHeaderLabelsAndValuesLines {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
237 my($DataHeaderLabelsRef, $DataHeaderLabelsAndValuesRef, $SortDataLabels) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
238 my($DataLabel, $DataValue, @DataLabels, @DataLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
239
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
240 if (!defined $SortDataLabels) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
241 $SortDataLabels = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
242 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
243
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
244 @DataLines = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
245 @DataLabels = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
246 if ($SortDataLabels) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
247 push @DataLabels, sort @{$DataHeaderLabelsRef};
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
248 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
249 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
250 push @DataLabels, @{$DataHeaderLabelsRef};
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
251 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
252 for $DataLabel (@DataLabels) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
253 $DataValue = '';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
254 if (exists $DataHeaderLabelsAndValuesRef->{$DataLabel}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
255 $DataValue = $DataHeaderLabelsAndValuesRef->{$DataLabel};
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
256 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
257 push @DataLines, ("> <${DataLabel}>", "$DataValue", "");
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
258 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
259 return @DataLines;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
260 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
261
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
262 # Parse data field header in SD file and return lists of all and common data field
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
263 # labels.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
264 sub GetAllAndCommonCmpdDataHeaderLabels {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
265 my($SDFileRef) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
266 my($CmpdCount, $CmpdString, $Label, @CmpdLines, @DataFieldLabels, @CommonDataFieldLabels, %DataFieldLabelsMap);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
267
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
268 $CmpdCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
269 @DataFieldLabels = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
270 @CommonDataFieldLabels = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
271 %DataFieldLabelsMap = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
272
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
273 while ($CmpdString = ReadCmpdString($SDFileRef)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
274 $CmpdCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
275 @CmpdLines = split "\n", $CmpdString;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
276 # Process compound data header labels and figure out which ones are present for
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
277 # all the compounds...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
278 if (@DataFieldLabels) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
279 my (@CmpdDataFieldLabels) = GetCmpdDataHeaderLabels(\@CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
280 my(%CmpdDataFieldLabelsMap) = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
281 # Setup a map for the current labels...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
282 for $Label (@CmpdDataFieldLabels) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
283 $CmpdDataFieldLabelsMap{$Label} = "PresentInSome";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
284 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
285 # Check the presence old labels for this compound; otherwise, mark 'em new...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
286 for $Label (@DataFieldLabels) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
287 if (!$CmpdDataFieldLabelsMap{$Label}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
288 $DataFieldLabelsMap{$Label} = "PresentInSome";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
289 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
290 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
291 # Check the presence this compound in the old labels; otherwise, add 'em...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
292 for $Label (@CmpdDataFieldLabels ) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
293 if (!$DataFieldLabelsMap{$Label}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
294 # It's a new label...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
295 push @DataFieldLabels, $Label;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
296 $DataFieldLabelsMap{$Label} = "PresentInSome";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
297 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
298 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
299 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
300 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
301 # Get the initial label set and set up a map...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
302 @DataFieldLabels = GetCmpdDataHeaderLabels(\@CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
303 for $Label (@DataFieldLabels) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
304 $DataFieldLabelsMap{$Label} = "PresentInAll";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
305 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
306 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
307 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
308 # Identify the common data field labels...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
309 @CommonDataFieldLabels = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
310 for $Label (@DataFieldLabels) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
311 if ($DataFieldLabelsMap{$Label} eq "PresentInAll") {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
312 push @CommonDataFieldLabels, $Label;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
313 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
314 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
315 return ($CmpdCount, \@DataFieldLabels, \@CommonDataFieldLabels);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
316 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
317
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
318 # Parse all the data header labels and return 'em as an list...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
319 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
320 # Format:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
321 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
322 #> Data header line
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
323 #Data line(s)
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
324 #Blank line
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
325 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
326 # [Data Header] (one line) precedes each item of data, starts with a greater than (>) sign, and
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
327 # contains at least one of the following:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
328 # The field name enclosed in angle brackets. For example: <melting.point>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
329 # The field number, DTn , where n represents the number assigned to the field in a MACCS-II database
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
330 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
331 #Optional information for the data header includes:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
332 # The compound’s external and internal registry numbers. External registry numbers must be enclosed in parentheses.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
333 # Any combination of information
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
334 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
335 #The following are examples of valid data headers:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
336 #> <MELTING.POINT>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
337 #> 55 (MD-08974) <BOILING.POINT> DT12
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
338 #> DT12 55
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
339 #> (MD-0894) <BOILING.POINT> FROM ARCHIVES
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
340 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
341 #Notes: Sometimes last blank line is missing and can be just followed by $$$$
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
342 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
343 sub GetCmpdDataHeaderLabels {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
344 my($CmpdLines) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
345 my($CmpdLine, $Label, @Labels);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
346
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
347 @Labels = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
348 CMPDLINE: for $CmpdLine (@$CmpdLines) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
349 if ($CmpdLine !~ /^>/) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
350 next CMPDLINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
351 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
352 # Does the line contains field name enclosed in angular brackets?
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
353 ($Label) = $CmpdLine =~ /<.*?>/g;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
354 if (!defined($Label)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
355 next CMPDLINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
356 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
357 $Label =~ s/(<|>)//g;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
358 push @Labels, $Label;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
359 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
360 return (@Labels);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
361 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
362
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
363 # Parse all the data header labels and values
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
364 sub GetCmpdDataHeaderLabelsAndValues {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
365 my($CmpdLines) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
366 my($CmpdLine, $CurrentLabel, $Label, $Value, $ValueCount, $ProcessingLabelData, @Values, %DataFields);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
367
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
368 %DataFields = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
369 $ProcessingLabelData = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
370 $ValueCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
371 CMPDLINE: for $CmpdLine (@$CmpdLines) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
372 if ($CmpdLine =~ /^\$\$\$\$/) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
373 last CMPDLINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
374 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
375 if ($CmpdLine =~ /^>/) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
376 # Does the line contains field name enclosed in angular brackets?
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
377 ($Label) = $CmpdLine =~ /<.*?>/g;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
378 if (defined $Label) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
379 $CurrentLabel = $Label;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
380 $CurrentLabel =~ s/(<|>)//g;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
381 $ProcessingLabelData = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
382 $ValueCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
383
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
384 if ($CurrentLabel) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
385 $ProcessingLabelData = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
386 $DataFields{$CurrentLabel} = '';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
387 next CMPDLINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
388 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
389 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
390 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
391 if (!$ProcessingLabelData) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
392 # Data line containing no <label> as allowed by SDF format. Just ignore it...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
393 next CMPDLINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
394 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
395 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
396 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
397 if (!$ProcessingLabelData) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
398 next CMPDLINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
399 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
400 if (!(defined($CmpdLine) && length($CmpdLine))) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
401 # Blank line terminates value for a label...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
402 $CurrentLabel = '';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
403 $ValueCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
404 $ProcessingLabelData = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
405 next CMPDLINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
406 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
407 $ValueCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
408 $Value = $CmpdLine;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
409
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
410 if ($ValueCount > 1) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
411 $DataFields{$CurrentLabel} .= "\n" . $Value;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
412 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
413 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
414 $DataFields{$CurrentLabel} = $Value;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
415 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
416 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
417 return (%DataFields);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
418 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
419
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
420 # Return an updated compoud string after removing data header label along with its
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
421 # value from the specified compound string...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
422 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
423 sub RemoveCmpdDataHeaderLabelAndValue {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
424 my($CmpdString, $DataHeaderLabel) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
425 my($Line, $PorcessingDataHeaderLabel, @CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
426
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
427 @CmpdLines = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
428 $PorcessingDataHeaderLabel = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
429
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
430 CMPDLINE: for $Line (split "\n", $CmpdString) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
431 if ($Line =~ /^>/ && $Line =~ /<$DataHeaderLabel>/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
432 $PorcessingDataHeaderLabel = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
433 next CMPDLINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
434 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
435
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
436 if ($PorcessingDataHeaderLabel) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
437 # Blank line indicates end of fingerprints data value...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
438 if ($Line =~ /^\$\$\$\$/) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
439 push @CmpdLines, $Line;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
440 $PorcessingDataHeaderLabel = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
441 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
442 elsif (!length($Line)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
443 $PorcessingDataHeaderLabel = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
444 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
445 next CMPDLINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
446 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
447
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
448 # Track compound lines without fingerprints data...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
449 push @CmpdLines, $Line;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
450 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
451
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
452 return join "\n", @CmpdLines;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
453 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
454
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
455 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
456 # Using bond blocks, figure out the number of disconnected fragments and
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
457 # return their values along with the atom numbers in a string delimited by new
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
458 # line character.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
459 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
460 sub GetCmpdFragments {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
461 my($CmpdLines) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
462 my($AtomCount, $BondCount, $FirstAtomNum, $SecondAtomNum, @AtomConnections, $BondType, $FragmentString, $FragmentCount, $LineIndex, $Index, $AtomNum, $NbrAtomNum, @ProcessedAtoms, $ProcessedAtomCount, $ProcessAtomNum, @ProcessingAtoms, @ConnectedAtoms, %Fragments, $FragmentNum, $AFragmentString);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
463
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
464 # Setup the connection table for each atom...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
465 @AtomConnections = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
466 ($AtomCount, $BondCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
467 for $AtomNum (1 .. $AtomCount) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
468 %{$AtomConnections[$AtomNum]} = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
469 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
470 for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
471 ($FirstAtomNum, $SecondAtomNum, $BondType) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
472 if (!$AtomConnections[$FirstAtomNum]{$SecondAtomNum}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
473 $AtomConnections[$FirstAtomNum]{$SecondAtomNum} = $BondType;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
474 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
475 if (!$AtomConnections[$SecondAtomNum]{$FirstAtomNum}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
476 $AtomConnections[$SecondAtomNum]{$FirstAtomNum} = $BondType;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
477 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
478 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
479
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
480 #Get set to count fragments...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
481 $ProcessedAtomCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
482 $FragmentNum = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
483 %Fragments = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
484 @ProcessedAtoms = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
485 for $AtomNum (1 .. $AtomCount) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
486 $ProcessedAtoms[$AtomNum] = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
487 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
488 while ($ProcessedAtomCount < $AtomCount) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
489 @ProcessingAtoms = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
490 @ConnectedAtoms = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
491 ATOMNUM: for $AtomNum (1 .. $AtomCount) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
492 if (!$ProcessedAtoms[$AtomNum]) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
493 $ProcessedAtomCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
494 $ProcessedAtoms[$AtomNum] = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
495 push @ProcessingAtoms, $AtomNum;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
496 $FragmentNum++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
497 @{$Fragments{$FragmentNum} } = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
498 push @{$Fragments{$FragmentNum} }, $AtomNum;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
499 last ATOMNUM;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
500 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
501 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
502
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
503 # Go over the neighbors and follow the connection trail while collecting the
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
504 # atoms numbers present in the connected fragment...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
505 while (@ProcessingAtoms) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
506 for ($Index = 0; $Index < @ProcessingAtoms; $Index++) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
507 $ProcessAtomNum = $ProcessingAtoms[$Index];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
508 for $NbrAtomNum (keys %{$AtomConnections[$ProcessAtomNum]}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
509 if (!$ProcessedAtoms[$NbrAtomNum]) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
510 $ProcessedAtomCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
511 $ProcessedAtoms[$NbrAtomNum] = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
512 push @ConnectedAtoms, $NbrAtomNum;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
513 push @{ $Fragments{$FragmentNum} }, $NbrAtomNum;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
514 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
515 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
516 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
517 @ProcessingAtoms = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
518 @ProcessingAtoms = @ConnectedAtoms;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
519 @ConnectedAtoms = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
520 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
521 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
522 $FragmentCount = $FragmentNum;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
523 $FragmentString = "";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
524
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
525 # Sort out the fragments by size...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
526 for $FragmentNum (sort { @{$Fragments{$b}} <=> @{$Fragments{$a}} } keys %Fragments ) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
527 # Sort the atoms in a fragment by their numbers...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
528 $AFragmentString = join " ", sort { $a <=> $b } @{ $Fragments{$FragmentNum} };
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
529 if ($FragmentString) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
530 $FragmentString .= "\n" . $AFragmentString;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
531 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
532 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
533 $FragmentString = $AFragmentString;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
534 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
535 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
536 return ($FragmentCount, $FragmentString);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
537 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
538
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
539 # Count number of lines present in between 4th and line containg "M END"
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
540 sub GetCtabLinesCount {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
541 my($CmpdLines) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
542 my($LineIndex, $CtabLinesCount);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
543
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
544 $CtabLinesCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
545 LINE: for ($LineIndex = 4; $LineIndex < @$CmpdLines; $LineIndex++) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
546 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
547 # Any line after atom and bond data starting with anything other than space or
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
548 # a digit indicates end of Ctab atom/bond data block...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
549 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
550 if (@$CmpdLines[$LineIndex] !~ /^[0-9 ]/) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
551 $CtabLinesCount = $LineIndex - 4;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
552 last LINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
553 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
554 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
555 return $CtabLinesCount;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
556 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
557
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
558 # Using atom blocks, count the number of atoms which contain special element
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
559 # symbols not present in the periodic table.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
560 sub GetUnknownAtoms {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
561 my($CmpdLines) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
562 my($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines, $LineIndex, $AtomCount, $AtomSymbol);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
563
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
564 $UnknownAtomCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
565 $UnknownAtoms = "";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
566 $UnknownAtomLines = "";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
567 ($AtomCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
568 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
569 ($AtomSymbol) = ParseCmpdAtomLine(@$CmpdLines[$LineIndex]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
570 if (!IsElement($AtomSymbol)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
571 $UnknownAtomCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
572 $UnknownAtoms .= " $AtomSymbol";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
573 if ($UnknownAtomLines) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
574 $UnknownAtomLines .= "\n" . @$CmpdLines[$LineIndex];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
575 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
576 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
577 $UnknownAtomLines = @$CmpdLines[$LineIndex];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
578 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
579 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
580 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
581 return ($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
582 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
583
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
584 # Check z coordinates of all atoms to see whether any of them is non-zero
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
585 # which makes the compound geometry three dimensional...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
586 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
587 sub IsCmpd3D {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
588 my($CmpdLines) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
589 my($LineIndex, $AtomCount, $AtomSymbol, $AtomX, $AtomY, $AtomZ);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
590
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
591 ($AtomCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
592 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
593 ($AtomSymbol, $AtomX, $AtomY, $AtomZ) = ParseCmpdAtomLine(@$CmpdLines[$LineIndex]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
594 if ($AtomZ != 0) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
595 return 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
596 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
597 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
598 return 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
599 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
600
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
601 # Check whether it's a 2D compound...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
602 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
603 sub IsCmpd2D {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
604 my($CmpdLines) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
605
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
606 return IsCmpd3D($CmpdLines) ? 0 : 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
607 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
608
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
609 # Using bond blocks, count the number of bond lines which contain atom numbers
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
610 # greater than atom count specified in compound count line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
611 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
612 sub GetInvalidAtomNumbers {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
613 my($CmpdLines) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
614 my($LineIndex, $AtomCount, $BondCount, $FirstAtomNum, $SecondAtomNum, $InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines, $Line, $InvalidAtomPropertyLine, $ValuePairIndex, $AtomNum, $Value, @ValuePairs);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
615
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
616 ($AtomCount, $BondCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
617
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
618 $InvalidAtomNumbersCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
619 $InvalidAtomNumbers = "";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
620 $InvalidAtomNumberLines = "";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
621
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
622 # Go over bond block lines...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
623 LINE: for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
624 ($FirstAtomNum, $SecondAtomNum) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
625 if ($FirstAtomNum <= $AtomCount && $SecondAtomNum <= $AtomCount) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
626 next LINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
627 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
628 if ($FirstAtomNum > $AtomCount) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
629 $InvalidAtomNumbersCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
630 $InvalidAtomNumbers .= " $FirstAtomNum";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
631 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
632 if ($SecondAtomNum > $AtomCount) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
633 $InvalidAtomNumbersCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
634 $InvalidAtomNumbers .= " $SecondAtomNum";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
635 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
636 if ($InvalidAtomNumberLines) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
637 $InvalidAtomNumberLines .= "\n" . @$CmpdLines[$LineIndex];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
638 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
639 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
640 $InvalidAtomNumberLines = @$CmpdLines[$LineIndex];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
641 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
642 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
643 # Go over property lines before M END...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
644 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
645 LINE: for ($LineIndex = (4 + $AtomCount + $BondCount); $LineIndex < @$CmpdLines; $LineIndex++) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
646 $Line = @$CmpdLines[$LineIndex];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
647 @ValuePairs = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
648 if ($Line =~ /^M END/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
649 last LINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
650 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
651 @ValuePairs = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
652 if ($Line =~ /^M CHG/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
653 @ValuePairs = ParseCmpdChargePropertyLine($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
654 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
655 elsif ($Line =~ /^M RAD/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
656 @ValuePairs = ParseCmpdRadicalPropertyLine($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
657 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
658 elsif ($Line =~ /^M ISO/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
659 @ValuePairs = ParseCmpdIsotopePropertyLine($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
660 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
661 elsif ($Line =~ /^A /i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
662 my($NextLine);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
663 $LineIndex++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
664 $NextLine = @$CmpdLines[$LineIndex];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
665 @ValuePairs = ParseCmpdAtomAliasPropertyLine($Line, $NextLine);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
666 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
667 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
668 next LINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
669 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
670
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
671 $InvalidAtomPropertyLine = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
672 for ($ValuePairIndex = 0; $ValuePairIndex < $#ValuePairs; $ValuePairIndex += 2) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
673 $AtomNum = $ValuePairs[$ValuePairIndex]; $Value = $ValuePairs[$ValuePairIndex + 1];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
674 if ($AtomNum > $AtomCount) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
675 $InvalidAtomPropertyLine = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
676 $InvalidAtomNumbersCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
677 $InvalidAtomNumbers .= " $AtomNum";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
678 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
679 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
680 if ($InvalidAtomPropertyLine) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
681 if ($InvalidAtomNumberLines) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
682 $InvalidAtomNumberLines .= "\n" . $Line;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
683 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
684 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
685 $InvalidAtomNumberLines = $Line;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
686 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
687 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
688 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
689
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
690 return ($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
691 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
692
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
693 # Ctab lines: Atom block
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
694 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
695 # Format: xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
696 # A10 A10 A10 xA3 A2A3 A3 A3 A3 A3 A3 A3 A3 A3 A3 A3
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
697 # x,y,z: Atom coordinates
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
698 # aaa: Atom symbol. Entry in periodic table or L for atom list, A, Q, * for unspecified
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
699 # atom, and LP for lone pair, or R# for Rgroup label
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
700 # dd: Mass difference. -3, -2, -1, 0, 1, 2, 3, 4 (0 for value beyond these limits)
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
701 # ccc: Charge. 0 = uncharged or value other than these, 1 = +3, 2 = +2, 3 = +1,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
702 # 4 = doublet radical, 5 = -1, 6 = -2, 7 = -3
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
703 # sss: Atom stereo parity. 0 = not stereo, 1 = odd, 2 = even, 3 = either or unmarked stereo center
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
704 # hhh: Hydrogen count + 1. 1 = H0, 2 = H1, 3 = H2, 4 = H3, 5 = H4
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
705 # bbb: Stereo care box. 0 = ignore stereo configuration of this double bond atom, 1 = stereo
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
706 # configuration of double bond atom must match
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
707 # vvv: Valence. 0 = no marking (default)(1 to 14) = (1 to 14) 15 = zero valence
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
708 # HHH: H0 designator. 0 = not specified, 1 = no H atoms allowed (redundant due to hhh)
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
709 # rrr: Not used
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
710 # iii: Not used
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
711 # mmm: Atom-atom mapping number. 1 - number of atoms
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
712 # nnn: Inversion/retention flag. 0 = property not applied, 1 = configuration is inverted,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
713 # 2 = configuration is retained.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
714 # eee: Exact change flag. 0 = property not applied, 1 = change on atom must be
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
715 # exactly as shown
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
716 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
717 # Notes:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
718 # . StereoParity: 1 - ClockwiseStereo, 2 - AntiClockwiseStereo; 3 - Either; 0 - none. These
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
719 # values determine chirailty around the chiral center; a non zero value indicates atom
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
720 # has been marked as chiral center.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
721 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
722 sub ParseCmpdAtomLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
723 my($Line) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
724 my ($LineIndex, $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
725
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
726 ($AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity) = ('') x 7;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
727 if (length($Line) > 31) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
728 ($AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity) = unpack("A10A10A10xA3A2A3A3", $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
729 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
730 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
731 ($AtomX, $AtomY, $AtomZ, $AtomSymbol) = unpack("A10A10A10", $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
732 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
733 return ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
734 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
735
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
736 # Map MDL charge value used in SD and MOL files to internal charge used by MayaChemTools.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
737 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
738 sub MDLChargeToInternalCharge {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
739 my($MDLCharge) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
740 my($InternalCharge);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
741
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
742 CHARGE: {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
743 if ($MDLCharge == 0) { $InternalCharge = 0; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
744 if ($MDLCharge == 1) { $InternalCharge = 3; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
745 if ($MDLCharge == 2) { $InternalCharge = 2; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
746 if ($MDLCharge == 3) { $InternalCharge = 1; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
747 if ($MDLCharge == 5) { $InternalCharge = -1; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
748 if ($MDLCharge == 6) { $InternalCharge = -2; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
749 if ($MDLCharge == 7) { $InternalCharge = -3; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
750 # All other MDL charge values, including 4 corresponding to "doublet radical",
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
751 # are assigned internal value of 0.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
752 $InternalCharge = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
753 if ($MDLCharge != 4) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
754 carp "Warning: MDLChargeToInternalCharge: MDL charge value, $MDLCharge, is not supported: An internal charge value, 0, has been assigned...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
755 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
756 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
757 return $InternalCharge;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
758 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
759
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
760 # Map internal charge used by MayaChemTools to MDL charge value used in SD and MOL files.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
761 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
762 sub InternalChargeToMDLCharge {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
763 my($InternalCharge) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
764 my($MDLCharge);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
765
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
766 CHARGE: {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
767 if ($InternalCharge == 3) { $MDLCharge = 1; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
768 if ($InternalCharge == 2) { $MDLCharge = 2; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
769 if ($InternalCharge == 1) { $MDLCharge = 3; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
770 if ($InternalCharge == -1) { $MDLCharge = 5; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
771 if ($InternalCharge == -2) { $MDLCharge = 6; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
772 if ($InternalCharge == -3) { $MDLCharge = 7; last CHARGE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
773 # All other MDL charge values, including 4 corresponding to "doublet radical",
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
774 # are assigned internal value of 0.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
775 $MDLCharge = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
776 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
777 return $MDLCharge;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
778 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
779
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
780 # Ctab lines: Bond block
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
781 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
782 # Format: 111222tttsssxxxrrrccc
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
783 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
784 # 111: First atom number.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
785 # 222: Second atom number.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
786 # ttt: Bond type. 1 = Single, 2 = Double, 3 = Triple, 4 = Aromatic, 5 = Single or Double,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
787 # 6 = Single or Aromatic, 7 = Double or Aromatic, 8 = Any
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
788 # sss: Bond stereo. Single bonds: 0 = not stereo, 1 = Up, 4 = Either, 6 = Down,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
789 # Double bonds: 0 = Use x-, y-, z-coords from atom block to determine cis or trans,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
790 # 3 = Cis or trans (either) double bond
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
791 # xxx: Not used
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
792 # rrr: Bond topology. 0 = Either, 1 = Ring, 2 = Chain
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
793 # ccc: Reacting center status. 0 = unmarked, 1 = a center, -1 = not a center,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
794 # Additional: 2 = no change,4 = bond made/broken, 8 = bond order changes 12 = 4+8
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
795 # (both made/broken and changes); 5 = (4 + 1), 9 = (8 + 1), and 13 = (12 + 1) are also possible
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
796 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
797 sub ParseCmpdBondLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
798 my($Line) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
799 my($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
800
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
801 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = map {s/ //g; $_} unpack("A3A3A3A3", $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
802 return ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
803 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
804
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
805 # Map MDL bond type value used in SD and MOL files to internal bond order and bond types
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
806 # values used by MayaChemTools...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
807 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
808 sub MDLBondTypeToInternalBondOrder {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
809 my($MDLBondType) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
810 my($InternalBondOrder, $InternalBondType);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
811
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
812 $InternalBondType = '';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
813
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
814 BONDTYPE: {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
815 if ($MDLBondType == 1) { $InternalBondOrder = 1; $InternalBondType = 'Single'; last BONDTYPE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
816 if ($MDLBondType == 2) { $InternalBondOrder = 2; $InternalBondType = 'Double'; last BONDTYPE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
817 if ($MDLBondType == 3) { $InternalBondOrder = 3; $InternalBondType = 'Triple'; last BONDTYPE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
818 if ($MDLBondType == 4) { $InternalBondOrder = 1.5; $InternalBondType = 'Aromatic'; last BONDTYPE;} # Aromatic
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
819 if ($MDLBondType == 5) { $InternalBondOrder = 1; $InternalBondType = 'SingleOrDouble'; last BONDTYPE;} # Aromatic
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
820 if ($MDLBondType == 6) { $InternalBondOrder = 1; $InternalBondType = 'SingleOrAromatic'; last BONDTYPE;} # Aromatic
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
821 if ($MDLBondType == 7) { $InternalBondOrder = 2; $InternalBondType = 'DoubleOrAromatic'; last BONDTYPE;} # Aromatic
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
822 if ($MDLBondType == 8) { $InternalBondOrder = 1; $InternalBondType = 'Any'; last BONDTYPE;} # Aromatic
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
823 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
824 # Although MDL aromatic bond values are used for query only and explicit Kekule bond order
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
825 # values must be assigned, internal value of 1.5 is allowed to indicate aromatic bond orders.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
826 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
827 # All other MDL bond type values - 5 = Single or Double, 6 = Single or Aromatic, 7 = Double or Aromatic,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
828 # 8 = Any - are also assigned appropriate internal value of 1: These are meant to be used for
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
829 # structure queries by MDL products.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
830 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
831 $InternalBondOrder = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
832 $InternalBondType = 'Single';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
833
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
834 carp "Warning: MDLBondTypeToInternalBondOrder: MDL bond type value, $MDLBondType, is not supported: An internal bond order value, 0, has been assigned...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
835 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
836 return ($InternalBondOrder, $InternalBondType);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
837 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
838
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
839 # Map internal bond order and bond type values used by MayaChemTools to MDL bond type value used
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
840 # in SD and MOL files...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
841 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
842 sub InternalBondOrderToMDLBondType {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
843 my($InternalBondOrder, $InternalBondType) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
844 my($MDLBondType);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
845
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
846 BONDTYPE: {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
847 if ($InternalBondOrder == 1) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
848 if ($InternalBondType =~ /^SingleOrDouble$/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
849 $MDLBondType = 5;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
850 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
851 elsif ($InternalBondType =~ /^SingleOrAromatic$/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
852 $MDLBondType = 6;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
853 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
854 elsif ($InternalBondType =~ /^Any$/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
855 $MDLBondType = 8;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
856 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
857 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
858 $MDLBondType = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
859 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
860 $MDLBondType = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
861 last BONDTYPE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
862 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
863 if ($InternalBondOrder == 2) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
864 if ($InternalBondType =~ /^DoubleOrAromatic$/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
865 $MDLBondType = 7;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
866 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
867 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
868 $MDLBondType = 2;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
869 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
870 last BONDTYPE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
871 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
872 if ($InternalBondOrder == 3) { $MDLBondType = 3; last BONDTYPE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
873 if ($InternalBondOrder == 1.5) { $MDLBondType = 4; last BONDTYPE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
874 if ($InternalBondType =~ /^Any$/i) { $MDLBondType = 8; last BONDTYPE;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
875
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
876 $MDLBondType = 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
877
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
878 carp "Warning: InternalBondOrderToMDLBondType: Internal bond order and type values, $InternalBondOrder and $InternalBondType, don't match any valid MDL bond type: MDL bond type value, 1, has been assigned...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
879 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
880 return $MDLBondType;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
881 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
882
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
883 # Third line: Comments - A blank line is also allowed.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
884 sub ParseCmpdCommentsLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
885 my($Line) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
886 my($Comments);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
887
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
888 $Comments = unpack("A80", $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
889
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
890 return ($Comments);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
891 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
892
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
893 # Map MDL bond stereo value used in SD and MOL files to internal bond stereochemistry values used by MayaChemTools...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
894 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
895 sub MDLBondStereoToInternalBondStereochemistry {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
896 my($MDLBondStereo) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
897 my($InternalBondStereo);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
898
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
899 $InternalBondStereo = '';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
900
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
901 BONDSTEREO: {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
902 if ($MDLBondStereo == 1) { $InternalBondStereo = 'Up'; last BONDSTEREO;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
903 if ($MDLBondStereo == 4) { $InternalBondStereo = 'UpOrDown'; last BONDSTEREO;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
904 if ($MDLBondStereo == 6) { $InternalBondStereo = 'Down'; last BONDSTEREO;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
905 if ($MDLBondStereo == 3) { $InternalBondStereo = 'CisOrTrans'; last BONDSTEREO;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
906 if ($MDLBondStereo == 0) { $InternalBondStereo = 'None'; last BONDSTEREO;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
907
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
908 $InternalBondStereo = '';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
909 carp "Warning: MDLBondStereoToInternalBondType: MDL bond stereo value, $MDLBondStereo, is not supported: It has been ignored and bond order would be used to determine bond type...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
910 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
911 return $InternalBondStereo;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
912 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
913
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
914 # Map internal bond stereochemistry values used by MayaChemTools to MDL bond stereo value used in SD and MOL files...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
915 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
916 sub InternalBondStereochemistryToMDLBondStereo {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
917 my($InternalBondStereo) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
918 my($MDLBondStereo);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
919
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
920 $MDLBondStereo = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
921
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
922 BONDSTEREO: {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
923 if ($InternalBondStereo =~ /^Up$/i) { $MDLBondStereo = 1; last BONDSTEREO;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
924 if ($InternalBondStereo =~ /^UpOrDown$/i) { $MDLBondStereo = 4; last BONDSTEREO;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
925 if ($InternalBondStereo =~ /^Down$/) { $MDLBondStereo = 6; last BONDSTEREO;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
926 if ($InternalBondStereo =~ /^CisOrTrans$/) { $MDLBondStereo = 3; last BONDSTEREO;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
927
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
928 $MDLBondStereo = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
929 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
930 return $MDLBondStereo;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
931 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
932
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
933 # Fourth line: Counts
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
934 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
935 # Format: aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
936 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
937 # aaa: number of atoms; bbb: number of bonds; lll: number of atom lists; fff: (obsolete)
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
938 # ccc: chiral flag: 0=not chiral, 1=chiral; sss: number of stext entries; xxx,rrr,ppp,iii:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
939 # (obsolete); mmm: number of lines of additional properties, including the M END line, No
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
940 # longer supported, default is set to 999; vvvvvv: version
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
941
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
942 sub ParseCmpdCountsLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
943 my($Line) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
944 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
945
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
946 if (length($Line) >= 39) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
947 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = unpack("A3A3x3x3A3x3x3x3x3x3A3A6", $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
948 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
949 elsif (length($Line) >= 15) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
950 ($PropertyCount, $Version) = ("999", "v2000");
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
951 ($AtomCount, $BondCount, $ChiralFlag) = unpack("A3A3x3x3A3", $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
952 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
953 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
954 ($ChiralFlag, $PropertyCount, $Version) = ("0", "999", "v2000");
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
955 ($AtomCount, $BondCount) = unpack("A3A3", $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
956 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
957
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
958 if ($Version =~ /V3000/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
959 # Current version of MayaChemTools modules and classes for processing MDL MOL and SD don't support
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
960 # V3000. So instead of relying on callers, just exit with an error to disable any processing of V3000
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
961 # format.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
962 croak "Error: SDFileUtil::ParseCmpdCountsLine: The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
963 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
964
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
965 return ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
966 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
967
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
968 # Second line: Misc info
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
969 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
970 # Format: IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
971 # A2A8 A10 A2I2A10 A12 A6
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
972 # User's first and last initials (I), program name (P), date/time (M/D/Y,H:m),
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
973 # dimensional codes - 2D or 3D (d),scaling factors (S, s), energy (E) if modeling program input,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
974 # internal registry number (R) if input through MDL form. A blank line is also allowed.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
975 sub ParseCmpdMiscInfoLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
976 my($Line) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
977 my($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
978
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
979 ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum) = unpack("A2A8A10A2A2A10A12A6", $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
980 return ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
981 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
982
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
983 # First line: Molecule name. This line is unformatted, but like all other lines in a
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
984 # molfile may not extend beyond column 80. A blank line is also allowed.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
985 sub ParseCmpdMolNameLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
986 my($Line) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
987 my($MolName);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
988
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
989 $MolName = unpack("A80", $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
990
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
991 return ($MolName);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
992 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
993
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
994 # Parse atom alias property line in CTAB generic properties block.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
995 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
996 # Atom alias property line format:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
997 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
998 # A aaa
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
999 # x...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1000 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1001 # aaa: Atom number
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1002 # x: Atom alias in next line
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1003 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1004 sub ParseCmpdAtomAliasPropertyLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1005 my($Line, $NextLine) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1006 my($Label, $AtomNumber, $AtomAlias);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1007
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1008 ($Label, $AtomNumber) = split(' ', $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1009 $AtomAlias = $NextLine;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1010
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1011 if (!$AtomAlias) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1012 carp "Warning: _ParseCmpdAtomAliasPropertyLine: No atom alias value specified on the line following atom alias property line...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1013 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1014
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1015 return ($AtomNumber, $AtomAlias);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1016 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1017
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1018 # Parse charge property line in CTAB generic properties block.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1019 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1020 # Charge property line format:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1021 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1022 # M CHGnn8 aaa vvv ...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1023 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1024 # nn8: Number of value pairs. Maximum of 8 pairs allowed.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1025 # aaa: Atom number
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1026 # vvv: -15 to +15. Default of 0 = uncharged atom. When present, this property supersedes
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1027 # all charge and radical values in the atom block, forcing a 0 charge on all atoms not
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1028 # listed in an M CHG or M RAD line.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1029 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1030 sub ParseCmpdChargePropertyLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1031 my($Line) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1032
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1033 return _ParseCmpdGenericPropertyLine('Charge', $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1034 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1035
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1036
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1037 # Parse isotope property line in CTAB generic properties block.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1038 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1039 # Isoptope property line format:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1040 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1041 # M ISOnn8 aaa vvv ...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1042 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1043 # nn8: Number of value paris. Maximum of 8 pairs allowed.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1044 # aaa: Atom number
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1045 # vvv: Absolute mass of the atom isotope as a positive integer. When present, this property
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1046 # supersedes all isotope values in the atom block. Default (no entry) means natural
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1047 # abundance. The difference between this absolute mass value and the natural
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1048 # abundance value specified in the PTABLE.DAT file must be within the range of -18
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1049 # to +12
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1050 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1051 # Notes:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1052 # . Values correspond to mass numbers...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1053 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1054 sub ParseCmpdIsotopePropertyLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1055 my($Line) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1056
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1057 return _ParseCmpdGenericPropertyLine('Isotope', $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1058 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1059
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1060 # Parse radical property line in CTAB generic properties block.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1061 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1062 # Radical property line format:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1063 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1064 # M RADnn8 aaa vvv ...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1065 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1066 # nn8: Number of value paris. Maximum of 8 pairs allowed.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1067 # aaa: Atom number
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1068 # vvv: Default of 0 = no radical, 1 = singlet, 2 = doublet, 3 = triplet . When
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1069 # present, this property supersedes all charge and radical values in the atom block,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1070 # forcing a 0 (zero) charge and radical on all atoms not listed in an M CHG or
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1071 # M RAD line.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1072 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1073 sub ParseCmpdRadicalPropertyLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1074 my($Line) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1075
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1076 return _ParseCmpdGenericPropertyLine('Radical', $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1077 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1078
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1079 # Map MDL radical stereo value used in SD and MOL files to internal spin multiplicity values used by MayaChemTools...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1080 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1081 sub MDLRadicalToInternalSpinMultiplicity {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1082 my($MDLRadical) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1083 my($InternalSpinMultiplicity);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1084
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1085 $InternalSpinMultiplicity = '';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1086
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1087 SPINMULTIPLICITY: {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1088 if ($MDLRadical == 0) { $InternalSpinMultiplicity = 0; last SPINMULTIPLICITY;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1089 if ($MDLRadical == 1) { $InternalSpinMultiplicity = 1; last SPINMULTIPLICITY;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1090 if ($MDLRadical == 2) { $InternalSpinMultiplicity = 2; last SPINMULTIPLICITY;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1091 if ($MDLRadical == 3) { $InternalSpinMultiplicity = 3; last SPINMULTIPLICITY;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1092 $InternalSpinMultiplicity = '';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1093 carp "Warning: MDLRadicalToInternalSpinMultiplicity: MDL radical value, $MDLRadical, specifed on line M RAD is not supported...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1094 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1095 return $InternalSpinMultiplicity;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1096 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1097
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1098 # Map internal spin multiplicity values used by MayaChemTools to MDL radical stereo value used in SD and MOL files...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1099 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1100 sub InternalSpinMultiplicityToMDLRadical {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1101 my($InternalSpinMultiplicity) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1102 my($MDLRadical);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1103
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1104 $MDLRadical = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1105
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1106 SPINMULTIPLICITY: {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1107 if ($InternalSpinMultiplicity == 1) { $MDLRadical = 1; last SPINMULTIPLICITY;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1108 if ($InternalSpinMultiplicity == 2) { $MDLRadical = 2; last SPINMULTIPLICITY;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1109 if ($InternalSpinMultiplicity == 3) { $MDLRadical = 3; last SPINMULTIPLICITY;}
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1110 $MDLRadical = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1111 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1112 return $MDLRadical;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1113 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1114
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1115 # Process generic CTAB property line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1116 sub _ParseCmpdGenericPropertyLine {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1117 my($PropertyName, $Line) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1118
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1119 my($Label, $PropertyLabel, $ValuesCount, $ValuePairsCount, @ValuePairs);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1120
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1121 @ValuePairs = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1122 ($Label, $PropertyLabel, $ValuesCount, @ValuePairs) = split(' ', $Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1123 $ValuePairsCount = (scalar @ValuePairs)/2;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1124 if ($ValuesCount != $ValuePairsCount) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1125 carp "Warning: _ParseCmpdGenericPropertyLine: Number of atom number and $PropertyName value paris specified on $Label $PropertyLabel property line, $ValuePairsCount, does not match expected value of $ValuesCount...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1126 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1127
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1128 return (@ValuePairs);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1129 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1130
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1131 # Generic CTAB property lines for charge, istope and radical properties...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1132 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1133 sub _GenerateCmpdGenericPropertyLines {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1134 my($PropertyName, $PropertyValuePairsRef) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1135 my($Index, $PropertyLabel, $Line, $PropertyCount, $AtomNum, $PropertyValue, @PropertyLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1136
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1137 @PropertyLines = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1138 NAME: {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1139 if ($PropertyName =~ /^Charge$/i) { $PropertyLabel = "M CHG"; last NAME; }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1140 if ($PropertyName =~ /^Isotope$/i) { $PropertyLabel = "M ISO"; last NAME; }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1141 if ($PropertyName =~ /^Radical$/i) { $PropertyLabel = "M RAD"; last NAME; }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1142 carp "Warning: _GenerateCmpdGenericPropertyLines: Unknown property name, $PropertyName, specified...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1143 return @PropertyLines;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1144 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1145
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1146 # A maximum of 8 property pair values allowed per line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1147 $PropertyCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1148 $Line = '';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1149 for ($Index = 0; $Index < $#{$PropertyValuePairsRef}; $Index += 2) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1150 if ($PropertyCount > 8) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1151 # Setup property line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1152 $Line = "${PropertyLabel} 8${Line}";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1153 push @PropertyLines, $Line;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1154
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1155 $PropertyCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1156 $Line = '';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1157 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1158 $PropertyCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1159 $AtomNum = $PropertyValuePairsRef->[$Index];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1160 $PropertyValue = $PropertyValuePairsRef->[$Index + 1];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1161 $Line .= sprintf " %3i %3i", $AtomNum, $PropertyValue;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1162 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1163 if ($Line) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1164 $Line = "${PropertyLabel} ${PropertyCount}${Line}";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1165 push @PropertyLines, $Line;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1166 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1167 return @PropertyLines;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1168 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1169
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1170 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1171 # Read compound data into a string and return its value
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1172 sub ReadCmpdString {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1173 my($SDFileRef) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1174 my($CmpdString);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1175
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1176 $CmpdString = "";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1177 LINE: while (defined($_ = <$SDFileRef>)) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1178 # Change Windows and Mac new line char to UNIX...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1179 s/(\r\n)|(\r)/\n/g;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1180
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1181 if (/^\$\$\$\$/) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1182 # Take out any new line char at the end by explicitly removing it instead of using
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1183 # chomp, which might not always work correctly on files generated on a system
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1184 # with a value of input line separator different from the current system...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1185 s/\n$//g;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1186
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1187 # Doesn't hurt to chomp...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1188 chomp;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1189
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1190 $CmpdString .= $_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1191 last LINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1192 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1193 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1194 $CmpdString .= $_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1195 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1196 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1197 return $CmpdString;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1198 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1199
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1200 # Find out the number of fragements in the compounds. And for the compound with
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1201 # more than one fragment, remove all the others besides the largest one.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1202 sub WashCmpd {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1203 my($CmpdLines) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1204 my($WashedCmpdString, $FragmentCount, $Fragments);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1205
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1206 $WashedCmpdString = "";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1207 ($FragmentCount, $Fragments) = GetCmpdFragments($CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1208 if ($FragmentCount > 1) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1209 # Go over the compound data for the largest fragment including property
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1210 # data...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1211 my (@AllFragments, @LargestFragment, %LargestFragmentAtoms, @WashedCmpdLines, $Index, $LineIndex, $AtomCount, $BondCount, $NewAtomCount, $NewBondCount, $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo, $FirstNewAtomNum, $SecondNewAtomNum, $AtomNum, $ChiralFlag, $BondLine, $MENDLineIndex, $Line, $Value, @ValuePairs, @NewValuePairs, $ValuePairIndex, $NewAtomNum, @NewPropertyLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1212
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1213 @AllFragments = (); @LargestFragment = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1214 %LargestFragmentAtoms = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1215 @AllFragments = split "\n", $Fragments;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1216 @LargestFragment = split " ", $AllFragments[0];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1217 for $Index (0 .. $#LargestFragment) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1218 # Map old atom numbers to new atom numbers as the fragment atom numbers are sorted
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1219 # from lowest to highest old atom numbers...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1220 $LargestFragmentAtoms{$LargestFragment[$Index]} = $Index + 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1221 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1222 @WashedCmpdLines = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1223 push @WashedCmpdLines, @$CmpdLines[0], @$CmpdLines[1], @$CmpdLines[2], @$CmpdLines[3];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1224 ($AtomCount, $BondCount, $ChiralFlag) = ParseCmpdCountsLine(@$CmpdLines[3]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1225 $NewAtomCount = @LargestFragment;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1226 $NewBondCount = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1227 $AtomNum = 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1228 # Retrieve the largest fragment atom lines...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1229 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1230 $AtomNum++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1231 if ($LargestFragmentAtoms{$AtomNum}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1232 push @WashedCmpdLines, @$CmpdLines[$LineIndex];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1233 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1234 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1235 # Retrieve the largest fragment bond lines...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1236 for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1237 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1238 if ($LargestFragmentAtoms{$FirstAtomNum} && $LargestFragmentAtoms{$SecondAtomNum}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1239 $NewBondCount++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1240 # Set up bond line with new atom number mapping...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1241 $FirstNewAtomNum = $LargestFragmentAtoms{$FirstAtomNum};
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1242 $SecondNewAtomNum = $LargestFragmentAtoms{$SecondAtomNum};
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1243 $BondLine = GenerateCmpdBondLine($FirstNewAtomNum, $SecondNewAtomNum, $BondType, $BondStereo);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1244 push @WashedCmpdLines, $BondLine;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1245 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1246 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1247 # Get property lines for CHG, ISO and RAD label and map the old atom numbers to new
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1248 # atom numners; Others, property lines before M END line are skipped as atom numbers for
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1249 # other properties might not valid anymore...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1250 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1251 $MENDLineIndex = $LineIndex;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1252 LINE: for ($LineIndex = (4 + $AtomCount + $BondCount); $LineIndex < @$CmpdLines; $LineIndex++) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1253 $Line = @$CmpdLines[$LineIndex];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1254 if ($Line =~ /^M END/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1255 push @WashedCmpdLines, "M END";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1256 $MENDLineIndex = $LineIndex;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1257 last LINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1258 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1259
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1260 @ValuePairs = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1261 if ($Line =~ /^M CHG/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1262 @ValuePairs = ParseCmpdChargePropertyLine($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1263 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1264 elsif ($Line =~ /^M RAD/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1265 @ValuePairs = ParseCmpdRadicalPropertyLine($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1266 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1267 elsif ($Line =~ /^M ISO/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1268 @ValuePairs = ParseCmpdIsotopePropertyLine($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1269 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1270 elsif ($Line =~ /^A /i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1271 my($NextLine);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1272 $LineIndex++;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1273 $NextLine = @$CmpdLines[$LineIndex];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1274 @ValuePairs = ParseCmpdAtomAliasPropertyLine($Line, $NextLine);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1275 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1276 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1277 next LINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1278 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1279
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1280 if (!@ValuePairs) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1281 next LINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1282 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1283
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1284 # Collect values for valid atom numbers with mapping to new atom numbers...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1285 @NewValuePairs = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1286 VALUEINDEX: for ($ValuePairIndex = 0; $ValuePairIndex < $#ValuePairs; $ValuePairIndex += 2) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1287 $AtomNum = $ValuePairs[$ValuePairIndex]; $Value = $ValuePairs[$ValuePairIndex + 1];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1288 if (!exists $LargestFragmentAtoms{$AtomNum}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1289 next VALUEINDEX;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1290 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1291 $NewAtomNum = $LargestFragmentAtoms{$AtomNum};
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1292 push @NewValuePairs, ($NewAtomNum, $Value)
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1293 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1294 if (!@NewValuePairs) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1295 next LINE;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1296 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1297 @NewPropertyLines = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1298 if ($Line =~ /^M CHG/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1299 @NewPropertyLines = GenerateCmpdChargePropertyLines(\@NewValuePairs);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1300 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1301 elsif ($Line =~ /^M RAD/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1302 @NewPropertyLines = GenerateCmpdRadicalPropertyLines(\@NewValuePairs);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1303 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1304 elsif ($Line =~ /^M ISO/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1305 @NewPropertyLines = GenerateCmpdIsotopePropertyLines(\@NewValuePairs);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1306 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1307 elsif ($Line =~ /^A /i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1308 @NewPropertyLines = GenerateCmpdAtomAliasPropertyLines(\@NewValuePairs);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1309 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1310 push @WashedCmpdLines, @NewPropertyLines;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1311 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1312
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1313 # Retrieve rest of the data label and value property data...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1314 for ($LineIndex = (1 + $MENDLineIndex); $LineIndex < @$CmpdLines; $LineIndex++) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1315 push @WashedCmpdLines, @$CmpdLines[$LineIndex];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1316 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1317 # Update atom and bond count line...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1318 $WashedCmpdLines[3] = GenerateCmpdCountsLine($NewAtomCount, $NewBondCount, $ChiralFlag);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1319
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1320 $WashedCmpdString = join "\n", @WashedCmpdLines;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1321 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1322 return ($FragmentCount, $Fragments, $WashedCmpdString);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1323 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1324
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1325 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1326
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1327 __END__
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1328
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1329 =head1 NAME
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1330
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1331 SDFileUtil
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1332
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1333 =head1 SYNOPSIS
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1334
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1335 use SDFileUtil ;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1336
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1337 use SDFileUtil qw(:all);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1338
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1339 =head1 DESCRIPTION
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1340
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1341 B<SDFileUtil> module provides the following functions:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1342
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1343 GenerateCmpdAtomAliasPropertyLines, GenerateCmpdAtomLine, GenerateCmpdBondLine,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1344 GenerateCmpdChargePropertyLines, GenerateCmpdCommentsLine, GenerateCmpdCountsLine,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1345 GenerateCmpdDataHeaderLabelsAndValuesLines, GenerateCmpdIsotopePropertyLines,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1346 GenerateCmpdMiscInfoLine, GenerateCmpdMolNameLine,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1347 GenerateCmpdRadicalPropertyLines, GenerateEmptyCtabBlockLines,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1348 GenerateMiscLineDateStamp, GetAllAndCommonCmpdDataHeaderLabels,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1349 GetCmpdDataHeaderLabels, GetCmpdDataHeaderLabelsAndValues, GetCmpdFragments,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1350 GetCtabLinesCount, GetInvalidAtomNumbers, GetUnknownAtoms,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1351 InternalBondOrderToMDLBondType, InternalBondStereochemistryToMDLBondStereo,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1352 InternalChargeToMDLCharge, InternalSpinMultiplicityToMDLRadical, IsCmpd2D,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1353 IsCmpd3D, MDLBondStereoToInternalBondStereochemistry,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1354 MDLBondTypeToInternalBondOrder, MDLChargeToInternalCharge,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1355 MDLRadicalToInternalSpinMultiplicity, ParseCmpdAtomAliasPropertyLine,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1356 ParseCmpdAtomLine, ParseCmpdBondLine, ParseCmpdChargePropertyLine,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1357 ParseCmpdCommentsLine, ParseCmpdCountsLine, ParseCmpdIsotopePropertyLine,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1358 ParseCmpdMiscInfoLine, ParseCmpdMolNameLine, ParseCmpdRadicalPropertyLine,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1359 ReadCmpdString, RemoveCmpdDataHeaderLabelAndValue, WashCmpd
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1360
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1361 =head1 METHODS
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1362
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1363 =over 4
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1364
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1365 =item B<GenerateCmpdAtomAliasPropertyLines>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1366
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1367 @Lines = GenerateCmpdAtomAliasPropertyLines($AliasValuePairsRef);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1368
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1369 Returns a formatted atom alias property lines corresponding to successive pairs
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1370 of atom number and alias values specified by a refernce to an array. Two lines
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1371 are generate for each atom number and alias value pairs: First line - A <AtomNum>;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1372 Second line:<AtomAlias>.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1373
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1374 =item B<GenerateCmpdAtomLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1375
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1376 $Line = GenerateCmpdAtomLine($AtomSymbol, $AtomX, $AtomY,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1377 $AtomZ, [$MassDifference, $Charge, $StereoParity]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1378
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1379 Returns a formatted atom data line containing all the input values.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1380
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1381 =item B<GenerateCmpdBondLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1382
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1383 $Line = GenerateCmpdBondLine($FirstAtomNum, $SecondAtomNum,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1384 $BondType, [$BondStereo]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1385
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1386 Returns a formatted bond data line containing all the input values.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1387
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1388 =item B<GenerateCmpdChargePropertyLines>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1389
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1390 @Lines = GenerateCmpdChargePropertyLines($ChargeValuePairsRef);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1391
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1392 Returns a formatted M CHG property lines corresponding to successive pairs of
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1393 atom number and charge values specified by a refernce to an array.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1394
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1395 =item B<GenerateCmpdCommentsLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1396
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1397 $Line = GenerateCmpdCommentsLine($Comments);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1398
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1399 Returns a formatted comments data line.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1400
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1401 =item B<GenerateCmpdCountsLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1402
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1403 $Line = GenerateCmpdCountsLine($AtomCount, $BondCount,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1404 $ChiralFlag, [$PropertyCount, $Version]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1405
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1406 Returns a formatted line containing all the input values. The default values of 999
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1407 and V2000 are used for I<PropertyCount> and I<Version>.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1408
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1409 =item B<GenerateCmpdDataHeaderLabelsAndValuesLines>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1410
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1411 @Lines = GenerateCmpdDataHeaderLabelsAndValuesLines(
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1412 $DataHeaderLabelsRef, $DataHeaderLabelsAndValuesRef,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1413 [$SortDataLabels]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1414
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1415 Returns formatted data lines containing header label and values lines corresponding to
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1416 all data header labels in array reference I<DataHeaderLabelsRef> with values in hash
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1417 reference I<DataHeaderLabelsAndValuesRef>. By default, data header labels are
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1418 not sorted and correspond to the label order in array reference I<DataHeaderLabelsRef>.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1419
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1420 =item B<GenerateCmpdIsotopePropertyLines>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1421
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1422 @Lines = GenerateCmpdIsotopePropertyLines($IsotopeValuePairsRef);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1423
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1424 Returns a formatted M ISO property lines corresponding to successive pairs of
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1425 atom number and isotope values specified by a refernce to an array.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1426
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1427 =item B<GenerateCmpdMiscInfoLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1428
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1429 $Line = GenerateCmpdMiscInfoLine([$ProgramName, $UserInitial,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1430 $Code]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1431
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1432 Returns a formatted line containing specified user initial, program name, date and code.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1433 Default values are: I<ProgramName - MayaChem; UserInitial - NULL; Code - 2D>.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1434
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1435 =item B<GenerateCmpdMolNameLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1436
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1437 $Line = GenerateCmpdMolNameLine($MolName);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1438
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1439 Returns a formatted molecule name data line.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1440
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1441 =item B<GenerateCmpdRadicalPropertyLines>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1442
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1443 @Lines = GenerateCmpdRadicalPropertyLines($RadicalValuePairsRef);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1444
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1445 Returns a formatted M CHG property lines corresponding to successive pairs of
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1446 atom number and multiplicity values specified by a refernce to an array.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1447
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1448 =item B<GenerateEmptyCtabBlockLines>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1449
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1450 $Lines = GenerateCmpdMiscInfoLine([$Date]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1451
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1452 Returns formatted lines representing empty CTAB block.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1453
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1454 =item B<GenerateMiscLineDateStamp>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1455
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1456 $Line = GenerateMiscLineDateStamp();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1457
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1458 Returns date stamp for misc line.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1459
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1460 =item B<GetAllAndCommonCmpdDataHeaderLabels>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1461
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1462 ($CmpdCount, $DataFieldLabelsArrayRef,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1463 $CommonDataFieldLabelsArrayRef) =
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1464 GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1465
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1466 Returns number of comopunds, a reference to an array containing all unique data header
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1467 label and a reference to an array containing common data field labels for all compounds
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1468 in SD file.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1469
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1470 =item B<GetCmpdDataHeaderLabels>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1471
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1472 (@Labels) = GetCmpdDataHeaderLabels(\@CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1473
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1474 Returns an array containg data header labels for a compound
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1475
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1476 =item B<GetCmpdDataHeaderLabelsAndValues>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1477
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1478 (%DataValues) = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1479
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1480 Returns a hash conating data header labes and values for a compound.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1481
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1482 =item B<GetCmpdFragments>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1483
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1484 ($FragmentCount, $FragmentString) = GetCmpdFragments(\@CmpLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1485
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1486 Figures out the number of disconnected fragments and return their values along
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1487 with the atom numbers in a string delimited by new line character. Fragment data
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1488 in B<FragmentString> is sorted on based on its size.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1489
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1490 =item B<GetCtabLinesCount>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1491
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1492 $CtabLinesCount = GetCtabLinesCount(\@CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1493
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1494 Returns number of lines present between the 4th line and the line containg "M END".
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1495
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1496 =item B<GetInvalidAtomNumbers>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1497
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1498 ($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) =
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1499 GetInvalidAtomNumbers(\@CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1500
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1501 Returns a list of values containing information about invalid atom numbers present
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1502 in block or atom property lines.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1503
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1504 =item B<GetUnknownAtoms>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1505
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1506 ($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) =
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1507 GetUnknownAtoms(\@CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1508
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1509 Returns a list of values containing information about atoms which contain special element
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1510 symbols not present in the periodic table.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1511
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1512 =item B<InternalBondOrderToMDLBondType>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1513
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1514 $MDLBondType = InternalBondOrderToMDLBondType($InternalBondOrder);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1515
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1516 Returns value of I<MDLBondType> corresponding to I<InternalBondOrder>.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1517
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1518 InternalBondOrder MDLBondType
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1519
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1520 1 1
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1521 2 2
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1522 3 3
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1523 1.5 4
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1524
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1525 =item B<InternalBondStereochemistryToMDLBondStereo>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1526
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1527 $MDLBondStereo = InternalBondStereochemistryToMDLBondStereo(
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1528 $InternalBondStereo);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1529
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1530 Returns value of I<MDLBondStereo> corresponding to I<InternalBondStereo> using following
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1531 mapping:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1532
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1533 InternalBondStereo MDLBondStereo
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1534
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1535 Up 1
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1536 UpOrDown 4
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1537 Down 6
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1538 CisOrTrans 3
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1539 Other 0
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1540
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1541 =item B<InternalChargeToMDLCharge>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1542
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1543 $MDLCharge = InternalChargeToMDLCharge($InternalCharge);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1544
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1545 Returns value of I<MDLCharge> corresponding to I<InternalCharge> using following
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1546 mapping:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1547
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1548 InternalCharge MDLCharge
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1549
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1550 3 1
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1551 2 2
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1552 1 3
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1553 -1 5
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1554 -2 6
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1555 -3 7
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1556
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1557 =item B<InternalSpinMultiplicityToMDLRadical>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1558
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1559 $MDLRadical = InternalSpinMultiplicityToMDLRadical(
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1560 $InternalSpinMultiplicity);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1561
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1562 Returns value of I<MDLRadical> corresponding to I<InternalSpinMultiplicity>. These
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1563 value are equivalent.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1564
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1565 =item B<MDLBondStereoToInternalBondType>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1566
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1567 $InternalBondType = MDLBondStereoToInternalBondType($MDLBondStereo);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1568
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1569 Returns value of I<InternalBondType> corresponding to I<MDLBondStereo> using
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1570 mapping shown for B<InternalBondTypeToMDLBondStereo> function.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1571
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1572 =item B<IsCmpd2D>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1573
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1574 $Status = IsCmpd2D();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1575
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1576 Returns 1 or 0 based on whether z-coordinate of any atom is non-zero.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1577
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1578 =item B<IsCmpd3D>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1579
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1580 $Status = IsCmpd3D();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1581
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1582 Returns 1 or 0 based on whether z-coordinate of any atom is non-zero.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1583
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1584 =item B<MDLBondStereoToInternalBondStereochemistry>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1585
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1586 $InternalBondStereo = MDLBondStereoToInternalBondStereochemistry(
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1587 $MDLBondStereo);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1588
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1589 Returns value of I<InternalBondStereo> corresponding to I<MDLBondStereo> using
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1590 mapping shown for B<InternalBondStereochemistryToMDLBondStereo> function.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1591
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1592 =item B<MDLBondTypeToInternalBondOrder>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1593
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1594 $InternalBondOrder = MDLBondTypeToInternalBondOrder($MDLBondType);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1595
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1596 Returns value of I<InternalBondOrder> corresponding to I<MDLBondType> using
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1597 mapping shown for B<InternalBondOrderToMDLBondType> function.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1598
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1599 =item B<MDLChargeToInternalCharge>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1600
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1601 $InternalCharge = MDLChargeToInternalCharge($MDLCharge);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1602
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1603 Returns value of I<$InternalCharge> corresponding to I<MDLCharge> using
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1604 mapping shown for B<InternalChargeToMDLCharge> function.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1605
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1606 =item B<MDLRadicalToInternalSpinMultiplicity>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1607
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1608 $InternalSpinMultiplicity = MDLRadicalToInternalSpinMultiplicity(
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1609 $MDLRadical);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1610
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1611 Returns value of I<InternalSpinMultiplicity> corresponding to I<MDLRadical>. These
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1612 value are equivalent.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1613
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1614 =item B<ParseCmpdAtomAliasPropertyLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1615
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1616 @AtomNumAndValuePairs = ParseCmpdAtomAliasPropertyLine(
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1617 $CurrentLine, $NexLine);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1618
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1619 Parses atom alias propery lines in CTAB generic properties block and returns an array
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1620 with successive pairs of values corresponding to atom number and its alias.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1621
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1622 =item B<ParseCmpdAtomLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1623
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1624 ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1625 $StereoParity) = ParseCmpdAtomLine($AtomDataLine);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1626
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1627 Parses compound data line containing atom information and returns a list
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1628 of values.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1629
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1630 =item B<ParseCmpdBondLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1631
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1632 ($FirstAtomNum, $SecondAtomNum, $BondType) =
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1633 ParseCmpdBondLine($BondDataLine);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1634
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1635 Parses compound data line containing bond information and returns a list of
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1636 values.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1637
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1638 =item B<ParseCmpdCommentsLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1639
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1640 $Comments = ParseCmpdCommentsLine($CommentsDataLine);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1641
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1642 Returns the comment string.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1643
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1644 =item B<ParseCmpdChargePropertyLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1645
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1646 @AtomNumAndValuePairs = ParseCmpdChargePropertyLine(
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1647 $ChargeDataLine);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1648
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1649 Parses charge propery line in CTAB generic properties block and returns an array
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1650 with successive pairs of values corresponding to atom number and its charge.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1651
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1652 =item B<ParseCmpdCountsLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1653
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1654 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) =
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1655 ParseCmpdCountsLine(\@CountDataLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1656
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1657 Returns a list of values containing count information.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1658
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1659 =item B<ParseCmpdMiscInfoLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1660
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1661 ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1662 $Energy, $RegistryNum) = ParseCmpdMiscInfoLine($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1663
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1664 Returns a list of values containing miscellaneous information.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1665
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1666 =item B<ParseCmpdIsotopePropertyLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1667
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1668 @AtomNumAndValuePairs = ParseCmpdIsotopePropertyLine(
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1669 $IsotopeDataLine);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1670
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1671 Parses isotopic propery line in CTAB generic properties block and returns an array
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1672 with successive pairs of values corresponding to atom number and absolute mass of
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1673 atom isotope.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1674
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1675 =item B<ParseCmpdMolNameLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1676
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1677 $MolName = ParseCmpdMolNameLine($Line);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1678
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1679 Returns a string containing molecule name.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1680
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1681 =item B<ParseCmpdRadicalPropertyLine>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1682
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1683 @AtomNumAndValuePairs = ParseCmpdRadicalPropertyLine(
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1684 $RadicalDataLine);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1685
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1686 Parses radical propery line in CTAB generic properties block and returns an array
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1687 with successive pairs of values corresponding to atom number and radical number
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1688 value.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1689
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1690 =item B<RemoveCmpdDataHeaderLabelAndValue>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1691
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1692 $NewCmpdString = RemoveCmpdDataHeaderLabelAndValue($CmpdString,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1693 $DataHeaderLabel);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1694
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1695 Returns a B<NewCmpdString> after removing I<DataHeaderLabel> along with its
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1696 value from I<CmpdString>.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1697
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1698 =item B<ReadCmpdString>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1699
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1700 $CmpdString = ReadCmpdString(\*SDFILEHANDLE);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1701
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1702 Returns a string containing all the data lines for the next available compound
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1703 in an already open file indicated by SDFILEHANDLE. A NULL string is returned
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1704 on EOF.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1705
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1706 =item B<WashCmpd>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1707
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1708 ($FragmentCount, $Fragments, $WashedCmpdString) =
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1709 WashCmpd(\@CmpdLines);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1710
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1711 Figures out the number of disconnected fragments and return their values along
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1712 with the atom numbers in a string delimited by new line character. Fragment data
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1713 in B<FragmentString> is sorted on based on its size.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1714
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1715 =back
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1716
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1717 =head1 AUTHOR
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1718
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1719 Manish Sud <msud@san.rr.com>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1720
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1721 =head1 SEE ALSO
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1722
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1723 TextUtil.pm
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1724
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1725 =head1 COPYRIGHT
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1726
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1727 Copyright (C) 2015 Manish Sud. All rights reserved.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1728
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1729 This file is part of MayaChemTools.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1730
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1731 MayaChemTools is free software; you can redistribute it and/or modify it under
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1732 the terms of the GNU Lesser General Public License as published by the Free
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1733 Software Foundation; either version 3 of the License, or (at your option)
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1734 any later version.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1735
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1736 =cut