annotate mayachemtool/mayachemtools/lib/SDFileUtil.pm @ 0:a4a2ad5a214e draft default tip

Uploaded
author deepakjadmin
date Thu, 05 Nov 2015 02:37:56 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1 package SDFileUtil;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
2 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: SDFileUtil.pm,v $
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/02/28 20:47:18 $
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.49 $
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
6 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
8 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
10 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
12 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
17 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
22 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
27 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
28
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
30 use Exporter;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
31 use Carp;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
32 use PeriodicTable qw(IsElement);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
33 use TimeUtil ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
34
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
36
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
37 @ISA = qw(Exporter);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
38 @EXPORT = qw(GenerateCmpdAtomLine GenerateCmpdBondLine GenerateCmpdChargePropertyLines GenerateCmpdCommentsLine GenerateCmpdCountsLine GenerateCmpdAtomAliasPropertyLines GenerateCmpdIsotopePropertyLines GenerateCmpdDataHeaderLabelsAndValuesLines GenerateCmpdMiscInfoLine GenerateCmpdRadicalPropertyLines GenerateCmpdMolNameLine GenerateEmptyCtabBlockLines GenerateMiscLineDateStamp GetAllAndCommonCmpdDataHeaderLabels GetCmpdDataHeaderLabels GetCmpdDataHeaderLabelsAndValues GetCmpdFragments GetCtabLinesCount GetUnknownAtoms GetInvalidAtomNumbers MDLChargeToInternalCharge InternalChargeToMDLCharge MDLBondTypeToInternalBondOrder InternalBondOrderToMDLBondType MDLBondStereoToInternalBondStereochemistry InternalBondStereochemistryToMDLBondStereo InternalSpinMultiplicityToMDLRadical MDLRadicalToInternalSpinMultiplicity IsCmpd3D IsCmpd2D ParseCmpdAtomLine ParseCmpdBondLine ParseCmpdCommentsLine ParseCmpdCountsLine ParseCmpdMiscInfoLine ParseCmpdMolNameLine ParseCmpdAtomAliasPropertyLine ParseCmpdChargePropertyLine ParseCmpdIsotopePropertyLine ParseCmpdRadicalPropertyLine ReadCmpdString RemoveCmpdDataHeaderLabelAndValue WashCmpd);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
39 @EXPORT_OK = qw();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
40 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
41
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
42 # Format data for compounds count line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
43 sub GenerateCmpdCountsLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
44 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version, $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
45
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
46 if (@_ == 5) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
47 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
48 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
49 elsif (@_ == 3) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
50 ($AtomCount, $BondCount, $ChiralFlag) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
51 $PropertyCount = 999;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
52 $Version = "V2000";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
53 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
54 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
55 ($AtomCount, $BondCount) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
56 $ChiralFlag = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
57 $PropertyCount = 999;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
58 $Version = "V2000";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
59 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
60 if ($AtomCount > 999) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
61 croak "Error: SDFileUtil::GenerateCmpdCountsLine: The atom count, $AtomCount, exceeds maximum of 999 allowed for CTAB version 2000. The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools...";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
62 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
63 $Line = sprintf "%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%6s", $AtomCount, $BondCount, 0, 0, $ChiralFlag, 0, 0, 0, 0, 0, $PropertyCount, $Version;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
64
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
65 return ($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
66 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
67
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
68 # Generate comments line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
69 sub GenerateCmpdCommentsLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
70 my($Comments) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
71 my($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
72
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
73 $Line = (length($Comments) > 80) ? substr($Comments, 0, 80) : $Comments;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
74
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
75 return $Line;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
76 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
77
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
78 # Generate molname line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
79 sub GenerateCmpdMolNameLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
80 my($MolName) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
81 my($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
82
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
83 $Line = (length($MolName) > 80) ? substr($MolName, 0, 80) : $MolName;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
84
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
85 return $Line;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
86 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
87
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
88 # Generate data for compounds misc info line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
89 sub GenerateCmpdMiscInfoLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
90 my($ProgramName, $UserInitial, $Code) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
91 my($Date, $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
92
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
93 if (!(defined($ProgramName) && $ProgramName)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
94 $ProgramName = "MayaChem";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
95 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
96 if (!(defined($UserInitial) && $UserInitial)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
97 $UserInitial = " ";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
98 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
99 if (!(defined($Code) && $Code)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
100 $Code = "2D";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
101 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
102
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
103 if (length($ProgramName) > 8) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
104 $ProgramName = substr($ProgramName, 0, 8);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
105 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
106 if (length($UserInitial) > 2) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
107 $UserInitial = substr($UserInitial, 0, 2);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
108 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
109 if (length($Code) > 2) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
110 $Code = substr($Code, 0, 2);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
111 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
112 $Date = GenerateMiscLineDateStamp();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
113
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
114 $Line = "${UserInitial}${ProgramName}${Date}${Code}";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
115
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
116 return $Line;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
117 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
118
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
119 # Generate data for compounds misc info line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
120 sub GenerateEmptyCtabBlockLines {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
121 my($Date, $Lines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
122
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
123 if (@_ == 1) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
124 ($Date) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
125 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
126 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
127 $Date = GenerateMiscLineDateStamp();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
128 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
129 # First line: Blank molname line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
130 # Second line: Misc info...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
131 # Third line: Blank comments line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
132 # Fourth line: Counts line reflecting empty structure data block...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
133 $Lines = "\n";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
134 $Lines .= " MayaChem${Date}2D\n";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
135 $Lines .= "\n";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
136 $Lines .= GenerateCmpdCountsLine(0, 0, 0) . "\n";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
137 $Lines .= "M END";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
138
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
139 return $Lines;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
140 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
141
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
142 # Generate SD file data stamp...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
143 sub GenerateMiscLineDateStamp {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
144 return TimeUtil::SDFileTimeStamp();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
145 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
146
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
147 # Generate data for compound atom line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
148 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
149 sub GenerateCmpdAtomLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
150 my($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
151 my($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
152
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
153 if (!defined $MassDifference) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
154 $MassDifference = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
155 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
156 if (!defined $Charge) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
157 $Charge = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
158 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
159 if (!defined $StereoParity) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
160 $StereoParity = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
161 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
162 $Line = sprintf "%10.4f%10.4f%10.4f %-3s%2i%3i%3i 0 0 0 0 0 0 0 0 0", $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
163
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
164 return $Line
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
165 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
166
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
167 # Generate data for compound bond line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
168 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
169 sub GenerateCmpdBondLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
170 my($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
171 my($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
172
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
173 if (!defined $BondStereo) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
174 $BondStereo = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
175 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
176 $Line = sprintf "%3i%3i%3i%3i 0 0 0", $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
177
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
178 return $Line
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
179 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
180
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
181 # Generate charge property lines for CTAB block...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
182 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
183 sub GenerateCmpdChargePropertyLines {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
184 my($ChargeValuePairsRef) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
185
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
186 return _GenerateCmpdGenericPropertyLines('Charge', $ChargeValuePairsRef);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
187 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
188
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
189 # Generate isotope property lines for CTAB block...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
190 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
191 sub GenerateCmpdIsotopePropertyLines {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
192 my($IsotopeValuePairsRef) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
193
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
194 return _GenerateCmpdGenericPropertyLines('Isotope', $IsotopeValuePairsRef);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
195 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
196
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
197 # Generate radical property line property lines for CTAB block...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
198 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
199 sub GenerateCmpdRadicalPropertyLines {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
200 my($RadicalValuePairsRef) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
201
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
202 return _GenerateCmpdGenericPropertyLines('Radical', $RadicalValuePairsRef);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
203 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
204
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
205 # Generate atom alias property line property lines for CTAB block...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
206 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
207 # Atom alias property line format:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
208 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
209 # A aaa
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
210 # x...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
211 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
212 # aaa: Atom number
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
213 # x: Atom alias in next line
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
214 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
215 sub GenerateCmpdAtomAliasPropertyLines {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
216 my($PropertyValuePairsRef) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
217 my($Index, $AtomNum, $AtomAlias, $Line, @PropertyLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
218
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
219 @PropertyLines = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
220
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
221 for ($Index = 0; $Index < $#{$PropertyValuePairsRef}; $Index += 2) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
222 $AtomNum = $PropertyValuePairsRef->[$Index];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
223 $AtomAlias = $PropertyValuePairsRef->[$Index + 1];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
224
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
225 $Line = "A " . sprintf "%3i", $AtomNum;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
226
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
227 push @PropertyLines, $Line;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
228 push @PropertyLines, $AtomAlias;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
229 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
230
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
231 return @PropertyLines;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
232 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
233
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
234 # Generate data header labels and values lines...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
235 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
236 sub GenerateCmpdDataHeaderLabelsAndValuesLines {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
237 my($DataHeaderLabelsRef, $DataHeaderLabelsAndValuesRef, $SortDataLabels) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
238 my($DataLabel, $DataValue, @DataLabels, @DataLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
239
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
240 if (!defined $SortDataLabels) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
241 $SortDataLabels = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
242 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
243
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
244 @DataLines = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
245 @DataLabels = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
246 if ($SortDataLabels) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
247 push @DataLabels, sort @{$DataHeaderLabelsRef};
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
248 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
249 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
250 push @DataLabels, @{$DataHeaderLabelsRef};
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
251 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
252 for $DataLabel (@DataLabels) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
253 $DataValue = '';
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
254 if (exists $DataHeaderLabelsAndValuesRef->{$DataLabel}) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
255 $DataValue = $DataHeaderLabelsAndValuesRef->{$DataLabel};
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
256 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
257 push @DataLines, ("> <${DataLabel}>", "$DataValue", "");
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
258 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
259 return @DataLines;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
260 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
261
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
262 # Parse data field header in SD file and return lists of all and common data field
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
263 # labels.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
264 sub GetAllAndCommonCmpdDataHeaderLabels {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
265 my($SDFileRef) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
266 my($CmpdCount, $CmpdString, $Label, @CmpdLines, @DataFieldLabels, @CommonDataFieldLabels, %DataFieldLabelsMap);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
267
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
268 $CmpdCount = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
269 @DataFieldLabels = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
270 @CommonDataFieldLabels = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
271 %DataFieldLabelsMap = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
272
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
273 while ($CmpdString = ReadCmpdString($SDFileRef)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
274 $CmpdCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
275 @CmpdLines = split "\n", $CmpdString;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
276 # Process compound data header labels and figure out which ones are present for
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
277 # all the compounds...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
278 if (@DataFieldLabels) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
279 my (@CmpdDataFieldLabels) = GetCmpdDataHeaderLabels(\@CmpdLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
280 my(%CmpdDataFieldLabelsMap) = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
281 # Setup a map for the current labels...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
282 for $Label (@CmpdDataFieldLabels) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
283 $CmpdDataFieldLabelsMap{$Label} = "PresentInSome";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
284 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
285 # Check the presence old labels for this compound; otherwise, mark 'em new...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
286 for $Label (@DataFieldLabels) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
287 if (!$CmpdDataFieldLabelsMap{$Label}) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
288 $DataFieldLabelsMap{$Label} = "PresentInSome";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
289 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
290 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
291 # Check the presence this compound in the old labels; otherwise, add 'em...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
292 for $Label (@CmpdDataFieldLabels ) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
293 if (!$DataFieldLabelsMap{$Label}) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
294 # It's a new label...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
295 push @DataFieldLabels, $Label;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
296 $DataFieldLabelsMap{$Label} = "PresentInSome";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
297 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
298 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
299 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
300 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
301 # Get the initial label set and set up a map...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
302 @DataFieldLabels = GetCmpdDataHeaderLabels(\@CmpdLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
303 for $Label (@DataFieldLabels) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
304 $DataFieldLabelsMap{$Label} = "PresentInAll";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
305 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
306 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
307 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
308 # Identify the common data field labels...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
309 @CommonDataFieldLabels = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
310 for $Label (@DataFieldLabels) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
311 if ($DataFieldLabelsMap{$Label} eq "PresentInAll") {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
312 push @CommonDataFieldLabels, $Label;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
313 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
314 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
315 return ($CmpdCount, \@DataFieldLabels, \@CommonDataFieldLabels);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
316 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
317
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
318 # Parse all the data header labels and return 'em as an list...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
319 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
320 # Format:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
321 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
322 #> Data header line
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
323 #Data line(s)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
324 #Blank line
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
325 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
326 # [Data Header] (one line) precedes each item of data, starts with a greater than (>) sign, and
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
327 # contains at least one of the following:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
328 # The field name enclosed in angle brackets. For example: <melting.point>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
329 # The field number, DTn , where n represents the number assigned to the field in a MACCS-II database
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
330 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
331 #Optional information for the data header includes:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
332 # The compound’s external and internal registry numbers. External registry numbers must be enclosed in parentheses.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
333 # Any combination of information
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
334 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
335 #The following are examples of valid data headers:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
336 #> <MELTING.POINT>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
337 #> 55 (MD-08974) <BOILING.POINT> DT12
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
338 #> DT12 55
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
339 #> (MD-0894) <BOILING.POINT> FROM ARCHIVES
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
340 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
341 #Notes: Sometimes last blank line is missing and can be just followed by $$$$
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
342 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
343 sub GetCmpdDataHeaderLabels {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
344 my($CmpdLines) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
345 my($CmpdLine, $Label, @Labels);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
346
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
347 @Labels = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
348 CMPDLINE: for $CmpdLine (@$CmpdLines) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
349 if ($CmpdLine !~ /^>/) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
350 next CMPDLINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
351 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
352 # Does the line contains field name enclosed in angular brackets?
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
353 ($Label) = $CmpdLine =~ /<.*?>/g;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
354 if (!defined($Label)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
355 next CMPDLINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
356 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
357 $Label =~ s/(<|>)//g;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
358 push @Labels, $Label;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
359 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
360 return (@Labels);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
361 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
362
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
363 # Parse all the data header labels and values
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
364 sub GetCmpdDataHeaderLabelsAndValues {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
365 my($CmpdLines) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
366 my($CmpdLine, $CurrentLabel, $Label, $Value, $ValueCount, $ProcessingLabelData, @Values, %DataFields);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
367
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
368 %DataFields = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
369 $ProcessingLabelData = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
370 $ValueCount = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
371 CMPDLINE: for $CmpdLine (@$CmpdLines) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
372 if ($CmpdLine =~ /^\$\$\$\$/) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
373 last CMPDLINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
374 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
375 if ($CmpdLine =~ /^>/) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
376 # Does the line contains field name enclosed in angular brackets?
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
377 ($Label) = $CmpdLine =~ /<.*?>/g;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
378 if (defined $Label) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
379 $CurrentLabel = $Label;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
380 $CurrentLabel =~ s/(<|>)//g;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
381 $ProcessingLabelData = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
382 $ValueCount = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
383
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
384 if ($CurrentLabel) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
385 $ProcessingLabelData = 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
386 $DataFields{$CurrentLabel} = '';
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
387 next CMPDLINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
388 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
389 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
390 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
391 if (!$ProcessingLabelData) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
392 # Data line containing no <label> as allowed by SDF format. Just ignore it...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
393 next CMPDLINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
394 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
395 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
396 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
397 if (!$ProcessingLabelData) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
398 next CMPDLINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
399 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
400 if (!(defined($CmpdLine) && length($CmpdLine))) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
401 # Blank line terminates value for a label...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
402 $CurrentLabel = '';
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
403 $ValueCount = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
404 $ProcessingLabelData = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
405 next CMPDLINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
406 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
407 $ValueCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
408 $Value = $CmpdLine;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
409
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
410 if ($ValueCount > 1) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
411 $DataFields{$CurrentLabel} .= "\n" . $Value;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
412 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
413 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
414 $DataFields{$CurrentLabel} = $Value;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
415 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
416 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
417 return (%DataFields);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
418 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
419
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
420 # Return an updated compoud string after removing data header label along with its
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
421 # value from the specified compound string...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
422 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
423 sub RemoveCmpdDataHeaderLabelAndValue {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
424 my($CmpdString, $DataHeaderLabel) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
425 my($Line, $PorcessingDataHeaderLabel, @CmpdLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
426
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
427 @CmpdLines = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
428 $PorcessingDataHeaderLabel = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
429
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
430 CMPDLINE: for $Line (split "\n", $CmpdString) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
431 if ($Line =~ /^>/ && $Line =~ /<$DataHeaderLabel>/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
432 $PorcessingDataHeaderLabel = 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
433 next CMPDLINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
434 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
435
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
436 if ($PorcessingDataHeaderLabel) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
437 # Blank line indicates end of fingerprints data value...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
438 if ($Line =~ /^\$\$\$\$/) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
439 push @CmpdLines, $Line;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
440 $PorcessingDataHeaderLabel = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
441 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
442 elsif (!length($Line)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
443 $PorcessingDataHeaderLabel = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
444 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
445 next CMPDLINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
446 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
447
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
448 # Track compound lines without fingerprints data...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
449 push @CmpdLines, $Line;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
450 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
451
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
452 return join "\n", @CmpdLines;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
453 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
454
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
455 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
456 # Using bond blocks, figure out the number of disconnected fragments and
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
457 # return their values along with the atom numbers in a string delimited by new
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
458 # line character.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
459 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
460 sub GetCmpdFragments {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
461 my($CmpdLines) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
462 my($AtomCount, $BondCount, $FirstAtomNum, $SecondAtomNum, @AtomConnections, $BondType, $FragmentString, $FragmentCount, $LineIndex, $Index, $AtomNum, $NbrAtomNum, @ProcessedAtoms, $ProcessedAtomCount, $ProcessAtomNum, @ProcessingAtoms, @ConnectedAtoms, %Fragments, $FragmentNum, $AFragmentString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
463
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
464 # Setup the connection table for each atom...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
465 @AtomConnections = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
466 ($AtomCount, $BondCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
467 for $AtomNum (1 .. $AtomCount) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
468 %{$AtomConnections[$AtomNum]} = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
469 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
470 for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
471 ($FirstAtomNum, $SecondAtomNum, $BondType) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
472 if (!$AtomConnections[$FirstAtomNum]{$SecondAtomNum}) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
473 $AtomConnections[$FirstAtomNum]{$SecondAtomNum} = $BondType;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
474 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
475 if (!$AtomConnections[$SecondAtomNum]{$FirstAtomNum}) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
476 $AtomConnections[$SecondAtomNum]{$FirstAtomNum} = $BondType;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
477 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
478 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
479
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
480 #Get set to count fragments...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
481 $ProcessedAtomCount = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
482 $FragmentNum = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
483 %Fragments = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
484 @ProcessedAtoms = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
485 for $AtomNum (1 .. $AtomCount) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
486 $ProcessedAtoms[$AtomNum] = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
487 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
488 while ($ProcessedAtomCount < $AtomCount) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
489 @ProcessingAtoms = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
490 @ConnectedAtoms = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
491 ATOMNUM: for $AtomNum (1 .. $AtomCount) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
492 if (!$ProcessedAtoms[$AtomNum]) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
493 $ProcessedAtomCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
494 $ProcessedAtoms[$AtomNum] = 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
495 push @ProcessingAtoms, $AtomNum;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
496 $FragmentNum++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
497 @{$Fragments{$FragmentNum} } = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
498 push @{$Fragments{$FragmentNum} }, $AtomNum;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
499 last ATOMNUM;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
500 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
501 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
502
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
503 # Go over the neighbors and follow the connection trail while collecting the
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
504 # atoms numbers present in the connected fragment...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
505 while (@ProcessingAtoms) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
506 for ($Index = 0; $Index < @ProcessingAtoms; $Index++) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
507 $ProcessAtomNum = $ProcessingAtoms[$Index];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
508 for $NbrAtomNum (keys %{$AtomConnections[$ProcessAtomNum]}) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
509 if (!$ProcessedAtoms[$NbrAtomNum]) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
510 $ProcessedAtomCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
511 $ProcessedAtoms[$NbrAtomNum] = 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
512 push @ConnectedAtoms, $NbrAtomNum;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
513 push @{ $Fragments{$FragmentNum} }, $NbrAtomNum;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
514 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
515 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
516 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
517 @ProcessingAtoms = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
518 @ProcessingAtoms = @ConnectedAtoms;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
519 @ConnectedAtoms = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
520 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
521 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
522 $FragmentCount = $FragmentNum;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
523 $FragmentString = "";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
524
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
525 # Sort out the fragments by size...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
526 for $FragmentNum (sort { @{$Fragments{$b}} <=> @{$Fragments{$a}} } keys %Fragments ) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
527 # Sort the atoms in a fragment by their numbers...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
528 $AFragmentString = join " ", sort { $a <=> $b } @{ $Fragments{$FragmentNum} };
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
529 if ($FragmentString) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
530 $FragmentString .= "\n" . $AFragmentString;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
531 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
532 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
533 $FragmentString = $AFragmentString;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
534 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
535 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
536 return ($FragmentCount, $FragmentString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
537 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
538
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
539 # Count number of lines present in between 4th and line containg "M END"
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
540 sub GetCtabLinesCount {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
541 my($CmpdLines) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
542 my($LineIndex, $CtabLinesCount);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
543
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
544 $CtabLinesCount = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
545 LINE: for ($LineIndex = 4; $LineIndex < @$CmpdLines; $LineIndex++) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
546 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
547 # Any line after atom and bond data starting with anything other than space or
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
548 # a digit indicates end of Ctab atom/bond data block...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
549 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
550 if (@$CmpdLines[$LineIndex] !~ /^[0-9 ]/) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
551 $CtabLinesCount = $LineIndex - 4;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
552 last LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
553 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
554 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
555 return $CtabLinesCount;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
556 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
557
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
558 # Using atom blocks, count the number of atoms which contain special element
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
559 # symbols not present in the periodic table.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
560 sub GetUnknownAtoms {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
561 my($CmpdLines) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
562 my($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines, $LineIndex, $AtomCount, $AtomSymbol);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
563
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
564 $UnknownAtomCount = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
565 $UnknownAtoms = "";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
566 $UnknownAtomLines = "";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
567 ($AtomCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
568 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
569 ($AtomSymbol) = ParseCmpdAtomLine(@$CmpdLines[$LineIndex]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
570 if (!IsElement($AtomSymbol)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
571 $UnknownAtomCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
572 $UnknownAtoms .= " $AtomSymbol";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
573 if ($UnknownAtomLines) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
574 $UnknownAtomLines .= "\n" . @$CmpdLines[$LineIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
575 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
576 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
577 $UnknownAtomLines = @$CmpdLines[$LineIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
578 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
579 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
580 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
581 return ($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
582 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
583
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
584 # Check z coordinates of all atoms to see whether any of them is non-zero
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
585 # which makes the compound geometry three dimensional...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
586 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
587 sub IsCmpd3D {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
588 my($CmpdLines) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
589 my($LineIndex, $AtomCount, $AtomSymbol, $AtomX, $AtomY, $AtomZ);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
590
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
591 ($AtomCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
592 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
593 ($AtomSymbol, $AtomX, $AtomY, $AtomZ) = ParseCmpdAtomLine(@$CmpdLines[$LineIndex]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
594 if ($AtomZ != 0) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
595 return 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
596 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
597 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
598 return 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
599 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
600
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
601 # Check whether it's a 2D compound...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
602 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
603 sub IsCmpd2D {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
604 my($CmpdLines) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
605
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
606 return IsCmpd3D($CmpdLines) ? 0 : 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
607 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
608
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
609 # Using bond blocks, count the number of bond lines which contain atom numbers
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
610 # greater than atom count specified in compound count line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
611 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
612 sub GetInvalidAtomNumbers {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
613 my($CmpdLines) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
614 my($LineIndex, $AtomCount, $BondCount, $FirstAtomNum, $SecondAtomNum, $InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines, $Line, $InvalidAtomPropertyLine, $ValuePairIndex, $AtomNum, $Value, @ValuePairs);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
615
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
616 ($AtomCount, $BondCount) = ParseCmpdCountsLine(@$CmpdLines[3]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
617
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
618 $InvalidAtomNumbersCount = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
619 $InvalidAtomNumbers = "";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
620 $InvalidAtomNumberLines = "";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
621
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
622 # Go over bond block lines...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
623 LINE: for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
624 ($FirstAtomNum, $SecondAtomNum) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
625 if ($FirstAtomNum <= $AtomCount && $SecondAtomNum <= $AtomCount) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
626 next LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
627 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
628 if ($FirstAtomNum > $AtomCount) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
629 $InvalidAtomNumbersCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
630 $InvalidAtomNumbers .= " $FirstAtomNum";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
631 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
632 if ($SecondAtomNum > $AtomCount) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
633 $InvalidAtomNumbersCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
634 $InvalidAtomNumbers .= " $SecondAtomNum";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
635 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
636 if ($InvalidAtomNumberLines) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
637 $InvalidAtomNumberLines .= "\n" . @$CmpdLines[$LineIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
638 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
639 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
640 $InvalidAtomNumberLines = @$CmpdLines[$LineIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
641 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
642 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
643 # Go over property lines before M END...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
644 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
645 LINE: for ($LineIndex = (4 + $AtomCount + $BondCount); $LineIndex < @$CmpdLines; $LineIndex++) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
646 $Line = @$CmpdLines[$LineIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
647 @ValuePairs = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
648 if ($Line =~ /^M END/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
649 last LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
650 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
651 @ValuePairs = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
652 if ($Line =~ /^M CHG/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
653 @ValuePairs = ParseCmpdChargePropertyLine($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
654 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
655 elsif ($Line =~ /^M RAD/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
656 @ValuePairs = ParseCmpdRadicalPropertyLine($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
657 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
658 elsif ($Line =~ /^M ISO/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
659 @ValuePairs = ParseCmpdIsotopePropertyLine($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
660 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
661 elsif ($Line =~ /^A /i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
662 my($NextLine);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
663 $LineIndex++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
664 $NextLine = @$CmpdLines[$LineIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
665 @ValuePairs = ParseCmpdAtomAliasPropertyLine($Line, $NextLine);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
666 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
667 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
668 next LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
669 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
670
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
671 $InvalidAtomPropertyLine = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
672 for ($ValuePairIndex = 0; $ValuePairIndex < $#ValuePairs; $ValuePairIndex += 2) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
673 $AtomNum = $ValuePairs[$ValuePairIndex]; $Value = $ValuePairs[$ValuePairIndex + 1];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
674 if ($AtomNum > $AtomCount) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
675 $InvalidAtomPropertyLine = 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
676 $InvalidAtomNumbersCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
677 $InvalidAtomNumbers .= " $AtomNum";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
678 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
679 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
680 if ($InvalidAtomPropertyLine) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
681 if ($InvalidAtomNumberLines) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
682 $InvalidAtomNumberLines .= "\n" . $Line;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
683 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
684 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
685 $InvalidAtomNumberLines = $Line;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
686 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
687 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
688 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
689
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
690 return ($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
691 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
692
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
693 # Ctab lines: Atom block
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
694 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
695 # Format: xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
696 # A10 A10 A10 xA3 A2A3 A3 A3 A3 A3 A3 A3 A3 A3 A3 A3
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
697 # x,y,z: Atom coordinates
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
698 # aaa: Atom symbol. Entry in periodic table or L for atom list, A, Q, * for unspecified
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
699 # atom, and LP for lone pair, or R# for Rgroup label
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
700 # dd: Mass difference. -3, -2, -1, 0, 1, 2, 3, 4 (0 for value beyond these limits)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
701 # ccc: Charge. 0 = uncharged or value other than these, 1 = +3, 2 = +2, 3 = +1,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
702 # 4 = doublet radical, 5 = -1, 6 = -2, 7 = -3
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
703 # sss: Atom stereo parity. 0 = not stereo, 1 = odd, 2 = even, 3 = either or unmarked stereo center
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
704 # hhh: Hydrogen count + 1. 1 = H0, 2 = H1, 3 = H2, 4 = H3, 5 = H4
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
705 # bbb: Stereo care box. 0 = ignore stereo configuration of this double bond atom, 1 = stereo
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
706 # configuration of double bond atom must match
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
707 # vvv: Valence. 0 = no marking (default)(1 to 14) = (1 to 14) 15 = zero valence
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
708 # HHH: H0 designator. 0 = not specified, 1 = no H atoms allowed (redundant due to hhh)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
709 # rrr: Not used
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
710 # iii: Not used
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
711 # mmm: Atom-atom mapping number. 1 - number of atoms
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
712 # nnn: Inversion/retention flag. 0 = property not applied, 1 = configuration is inverted,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
713 # 2 = configuration is retained.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
714 # eee: Exact change flag. 0 = property not applied, 1 = change on atom must be
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
715 # exactly as shown
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
716 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
717 # Notes:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
718 # . StereoParity: 1 - ClockwiseStereo, 2 - AntiClockwiseStereo; 3 - Either; 0 - none. These
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
719 # values determine chirailty around the chiral center; a non zero value indicates atom
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
720 # has been marked as chiral center.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
721 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
722 sub ParseCmpdAtomLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
723 my($Line) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
724 my ($LineIndex, $AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
725
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
726 ($AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity) = ('') x 7;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
727 if (length($Line) > 31) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
728 ($AtomX, $AtomY, $AtomZ, $AtomSymbol, $MassDifference, $Charge, $StereoParity) = unpack("A10A10A10xA3A2A3A3", $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
729 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
730 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
731 ($AtomX, $AtomY, $AtomZ, $AtomSymbol) = unpack("A10A10A10", $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
732 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
733 return ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge, $StereoParity);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
734 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
735
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
736 # Map MDL charge value used in SD and MOL files to internal charge used by MayaChemTools.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
737 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
738 sub MDLChargeToInternalCharge {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
739 my($MDLCharge) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
740 my($InternalCharge);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
741
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
742 CHARGE: {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
743 if ($MDLCharge == 0) { $InternalCharge = 0; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
744 if ($MDLCharge == 1) { $InternalCharge = 3; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
745 if ($MDLCharge == 2) { $InternalCharge = 2; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
746 if ($MDLCharge == 3) { $InternalCharge = 1; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
747 if ($MDLCharge == 5) { $InternalCharge = -1; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
748 if ($MDLCharge == 6) { $InternalCharge = -2; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
749 if ($MDLCharge == 7) { $InternalCharge = -3; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
750 # All other MDL charge values, including 4 corresponding to "doublet radical",
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
751 # are assigned internal value of 0.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
752 $InternalCharge = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
753 if ($MDLCharge != 4) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
754 carp "Warning: MDLChargeToInternalCharge: MDL charge value, $MDLCharge, is not supported: An internal charge value, 0, has been assigned...";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
755 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
756 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
757 return $InternalCharge;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
758 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
759
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
760 # Map internal charge used by MayaChemTools to MDL charge value used in SD and MOL files.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
761 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
762 sub InternalChargeToMDLCharge {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
763 my($InternalCharge) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
764 my($MDLCharge);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
765
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
766 CHARGE: {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
767 if ($InternalCharge == 3) { $MDLCharge = 1; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
768 if ($InternalCharge == 2) { $MDLCharge = 2; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
769 if ($InternalCharge == 1) { $MDLCharge = 3; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
770 if ($InternalCharge == -1) { $MDLCharge = 5; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
771 if ($InternalCharge == -2) { $MDLCharge = 6; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
772 if ($InternalCharge == -3) { $MDLCharge = 7; last CHARGE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
773 # All other MDL charge values, including 4 corresponding to "doublet radical",
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
774 # are assigned internal value of 0.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
775 $MDLCharge = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
776 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
777 return $MDLCharge;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
778 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
779
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
780 # Ctab lines: Bond block
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
781 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
782 # Format: 111222tttsssxxxrrrccc
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
783 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
784 # 111: First atom number.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
785 # 222: Second atom number.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
786 # ttt: Bond type. 1 = Single, 2 = Double, 3 = Triple, 4 = Aromatic, 5 = Single or Double,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
787 # 6 = Single or Aromatic, 7 = Double or Aromatic, 8 = Any
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
788 # sss: Bond stereo. Single bonds: 0 = not stereo, 1 = Up, 4 = Either, 6 = Down,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
789 # Double bonds: 0 = Use x-, y-, z-coords from atom block to determine cis or trans,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
790 # 3 = Cis or trans (either) double bond
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
791 # xxx: Not used
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
792 # rrr: Bond topology. 0 = Either, 1 = Ring, 2 = Chain
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
793 # ccc: Reacting center status. 0 = unmarked, 1 = a center, -1 = not a center,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
794 # Additional: 2 = no change,4 = bond made/broken, 8 = bond order changes 12 = 4+8
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
795 # (both made/broken and changes); 5 = (4 + 1), 9 = (8 + 1), and 13 = (12 + 1) are also possible
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
796 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
797 sub ParseCmpdBondLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
798 my($Line) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
799 my($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
800
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
801 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = map {s/ //g; $_} unpack("A3A3A3A3", $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
802 return ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
803 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
804
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
805 # Map MDL bond type value used in SD and MOL files to internal bond order and bond types
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
806 # values used by MayaChemTools...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
807 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
808 sub MDLBondTypeToInternalBondOrder {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
809 my($MDLBondType) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
810 my($InternalBondOrder, $InternalBondType);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
811
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
812 $InternalBondType = '';
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
813
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
814 BONDTYPE: {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
815 if ($MDLBondType == 1) { $InternalBondOrder = 1; $InternalBondType = 'Single'; last BONDTYPE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
816 if ($MDLBondType == 2) { $InternalBondOrder = 2; $InternalBondType = 'Double'; last BONDTYPE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
817 if ($MDLBondType == 3) { $InternalBondOrder = 3; $InternalBondType = 'Triple'; last BONDTYPE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
818 if ($MDLBondType == 4) { $InternalBondOrder = 1.5; $InternalBondType = 'Aromatic'; last BONDTYPE;} # Aromatic
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
819 if ($MDLBondType == 5) { $InternalBondOrder = 1; $InternalBondType = 'SingleOrDouble'; last BONDTYPE;} # Aromatic
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
820 if ($MDLBondType == 6) { $InternalBondOrder = 1; $InternalBondType = 'SingleOrAromatic'; last BONDTYPE;} # Aromatic
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
821 if ($MDLBondType == 7) { $InternalBondOrder = 2; $InternalBondType = 'DoubleOrAromatic'; last BONDTYPE;} # Aromatic
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
822 if ($MDLBondType == 8) { $InternalBondOrder = 1; $InternalBondType = 'Any'; last BONDTYPE;} # Aromatic
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
823 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
824 # Although MDL aromatic bond values are used for query only and explicit Kekule bond order
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
825 # values must be assigned, internal value of 1.5 is allowed to indicate aromatic bond orders.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
826 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
827 # All other MDL bond type values - 5 = Single or Double, 6 = Single or Aromatic, 7 = Double or Aromatic,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
828 # 8 = Any - are also assigned appropriate internal value of 1: These are meant to be used for
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
829 # structure queries by MDL products.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
830 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
831 $InternalBondOrder = 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
832 $InternalBondType = 'Single';
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
833
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
834 carp "Warning: MDLBondTypeToInternalBondOrder: MDL bond type value, $MDLBondType, is not supported: An internal bond order value, 0, has been assigned...";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
835 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
836 return ($InternalBondOrder, $InternalBondType);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
837 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
838
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
839 # Map internal bond order and bond type values used by MayaChemTools to MDL bond type value used
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
840 # in SD and MOL files...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
841 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
842 sub InternalBondOrderToMDLBondType {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
843 my($InternalBondOrder, $InternalBondType) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
844 my($MDLBondType);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
845
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
846 BONDTYPE: {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
847 if ($InternalBondOrder == 1) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
848 if ($InternalBondType =~ /^SingleOrDouble$/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
849 $MDLBondType = 5;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
850 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
851 elsif ($InternalBondType =~ /^SingleOrAromatic$/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
852 $MDLBondType = 6;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
853 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
854 elsif ($InternalBondType =~ /^Any$/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
855 $MDLBondType = 8;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
856 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
857 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
858 $MDLBondType = 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
859 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
860 $MDLBondType = 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
861 last BONDTYPE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
862 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
863 if ($InternalBondOrder == 2) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
864 if ($InternalBondType =~ /^DoubleOrAromatic$/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
865 $MDLBondType = 7;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
866 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
867 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
868 $MDLBondType = 2;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
869 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
870 last BONDTYPE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
871 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
872 if ($InternalBondOrder == 3) { $MDLBondType = 3; last BONDTYPE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
873 if ($InternalBondOrder == 1.5) { $MDLBondType = 4; last BONDTYPE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
874 if ($InternalBondType =~ /^Any$/i) { $MDLBondType = 8; last BONDTYPE;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
875
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
876 $MDLBondType = 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
877
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
878 carp "Warning: InternalBondOrderToMDLBondType: Internal bond order and type values, $InternalBondOrder and $InternalBondType, don't match any valid MDL bond type: MDL bond type value, 1, has been assigned...";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
879 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
880 return $MDLBondType;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
881 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
882
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
883 # Third line: Comments - A blank line is also allowed.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
884 sub ParseCmpdCommentsLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
885 my($Line) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
886 my($Comments);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
887
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
888 $Comments = unpack("A80", $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
889
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
890 return ($Comments);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
891 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
892
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
893 # Map MDL bond stereo value used in SD and MOL files to internal bond stereochemistry values used by MayaChemTools...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
894 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
895 sub MDLBondStereoToInternalBondStereochemistry {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
896 my($MDLBondStereo) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
897 my($InternalBondStereo);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
898
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
899 $InternalBondStereo = '';
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
900
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
901 BONDSTEREO: {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
902 if ($MDLBondStereo == 1) { $InternalBondStereo = 'Up'; last BONDSTEREO;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
903 if ($MDLBondStereo == 4) { $InternalBondStereo = 'UpOrDown'; last BONDSTEREO;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
904 if ($MDLBondStereo == 6) { $InternalBondStereo = 'Down'; last BONDSTEREO;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
905 if ($MDLBondStereo == 3) { $InternalBondStereo = 'CisOrTrans'; last BONDSTEREO;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
906 if ($MDLBondStereo == 0) { $InternalBondStereo = 'None'; last BONDSTEREO;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
907
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
908 $InternalBondStereo = '';
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
909 carp "Warning: MDLBondStereoToInternalBondType: MDL bond stereo value, $MDLBondStereo, is not supported: It has been ignored and bond order would be used to determine bond type...";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
910 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
911 return $InternalBondStereo;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
912 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
913
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
914 # Map internal bond stereochemistry values used by MayaChemTools to MDL bond stereo value used in SD and MOL files...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
915 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
916 sub InternalBondStereochemistryToMDLBondStereo {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
917 my($InternalBondStereo) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
918 my($MDLBondStereo);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
919
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
920 $MDLBondStereo = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
921
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
922 BONDSTEREO: {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
923 if ($InternalBondStereo =~ /^Up$/i) { $MDLBondStereo = 1; last BONDSTEREO;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
924 if ($InternalBondStereo =~ /^UpOrDown$/i) { $MDLBondStereo = 4; last BONDSTEREO;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
925 if ($InternalBondStereo =~ /^Down$/) { $MDLBondStereo = 6; last BONDSTEREO;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
926 if ($InternalBondStereo =~ /^CisOrTrans$/) { $MDLBondStereo = 3; last BONDSTEREO;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
927
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
928 $MDLBondStereo = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
929 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
930 return $MDLBondStereo;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
931 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
932
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
933 # Fourth line: Counts
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
934 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
935 # Format: aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
936 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
937 # aaa: number of atoms; bbb: number of bonds; lll: number of atom lists; fff: (obsolete)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
938 # ccc: chiral flag: 0=not chiral, 1=chiral; sss: number of stext entries; xxx,rrr,ppp,iii:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
939 # (obsolete); mmm: number of lines of additional properties, including the M END line, No
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
940 # longer supported, default is set to 999; vvvvvv: version
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
941
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
942 sub ParseCmpdCountsLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
943 my($Line) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
944 my($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
945
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
946 if (length($Line) >= 39) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
947 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) = unpack("A3A3x3x3A3x3x3x3x3x3A3A6", $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
948 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
949 elsif (length($Line) >= 15) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
950 ($PropertyCount, $Version) = ("999", "v2000");
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
951 ($AtomCount, $BondCount, $ChiralFlag) = unpack("A3A3x3x3A3", $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
952 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
953 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
954 ($ChiralFlag, $PropertyCount, $Version) = ("0", "999", "v2000");
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
955 ($AtomCount, $BondCount) = unpack("A3A3", $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
956 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
957
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
958 if ($Version =~ /V3000/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
959 # Current version of MayaChemTools modules and classes for processing MDL MOL and SD don't support
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
960 # V3000. So instead of relying on callers, just exit with an error to disable any processing of V3000
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
961 # format.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
962 croak "Error: SDFileUtil::ParseCmpdCountsLine: The Extended Connection Table (V3000) format in MDL MOL and SD files is not supported by the current release of MayaChemTools...";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
963 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
964
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
965 return ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
966 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
967
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
968 # Second line: Misc info
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
969 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
970 # Format: IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
971 # A2A8 A10 A2I2A10 A12 A6
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
972 # User's first and last initials (I), program name (P), date/time (M/D/Y,H:m),
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
973 # dimensional codes - 2D or 3D (d),scaling factors (S, s), energy (E) if modeling program input,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
974 # internal registry number (R) if input through MDL form. A blank line is also allowed.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
975 sub ParseCmpdMiscInfoLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
976 my($Line) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
977 my($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
978
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
979 ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum) = unpack("A2A8A10A2A2A10A12A6", $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
980 return ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2, $Energy, $RegistryNum);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
981 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
982
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
983 # First line: Molecule name. This line is unformatted, but like all other lines in a
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
984 # molfile may not extend beyond column 80. A blank line is also allowed.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
985 sub ParseCmpdMolNameLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
986 my($Line) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
987 my($MolName);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
988
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
989 $MolName = unpack("A80", $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
990
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
991 return ($MolName);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
992 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
993
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
994 # Parse atom alias property line in CTAB generic properties block.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
995 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
996 # Atom alias property line format:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
997 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
998 # A aaa
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
999 # x...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1000 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1001 # aaa: Atom number
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1002 # x: Atom alias in next line
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1003 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1004 sub ParseCmpdAtomAliasPropertyLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1005 my($Line, $NextLine) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1006 my($Label, $AtomNumber, $AtomAlias);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1007
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1008 ($Label, $AtomNumber) = split(' ', $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1009 $AtomAlias = $NextLine;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1010
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1011 if (!$AtomAlias) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1012 carp "Warning: _ParseCmpdAtomAliasPropertyLine: No atom alias value specified on the line following atom alias property line...";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1013 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1014
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1015 return ($AtomNumber, $AtomAlias);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1016 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1017
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1018 # Parse charge property line in CTAB generic properties block.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1019 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1020 # Charge property line format:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1021 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1022 # M CHGnn8 aaa vvv ...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1023 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1024 # nn8: Number of value pairs. Maximum of 8 pairs allowed.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1025 # aaa: Atom number
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1026 # vvv: -15 to +15. Default of 0 = uncharged atom. When present, this property supersedes
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1027 # all charge and radical values in the atom block, forcing a 0 charge on all atoms not
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1028 # listed in an M CHG or M RAD line.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1029 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1030 sub ParseCmpdChargePropertyLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1031 my($Line) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1032
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1033 return _ParseCmpdGenericPropertyLine('Charge', $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1034 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1035
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1036
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1037 # Parse isotope property line in CTAB generic properties block.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1038 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1039 # Isoptope property line format:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1040 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1041 # M ISOnn8 aaa vvv ...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1042 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1043 # nn8: Number of value paris. Maximum of 8 pairs allowed.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1044 # aaa: Atom number
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1045 # vvv: Absolute mass of the atom isotope as a positive integer. When present, this property
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1046 # supersedes all isotope values in the atom block. Default (no entry) means natural
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1047 # abundance. The difference between this absolute mass value and the natural
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1048 # abundance value specified in the PTABLE.DAT file must be within the range of -18
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1049 # to +12
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1050 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1051 # Notes:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1052 # . Values correspond to mass numbers...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1053 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1054 sub ParseCmpdIsotopePropertyLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1055 my($Line) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1056
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1057 return _ParseCmpdGenericPropertyLine('Isotope', $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1058 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1059
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1060 # Parse radical property line in CTAB generic properties block.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1061 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1062 # Radical property line format:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1063 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1064 # M RADnn8 aaa vvv ...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1065 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1066 # nn8: Number of value paris. Maximum of 8 pairs allowed.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1067 # aaa: Atom number
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1068 # vvv: Default of 0 = no radical, 1 = singlet, 2 = doublet, 3 = triplet . When
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1069 # present, this property supersedes all charge and radical values in the atom block,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1070 # forcing a 0 (zero) charge and radical on all atoms not listed in an M CHG or
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1071 # M RAD line.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1072 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1073 sub ParseCmpdRadicalPropertyLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1074 my($Line) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1075
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1076 return _ParseCmpdGenericPropertyLine('Radical', $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1077 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1078
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1079 # Map MDL radical stereo value used in SD and MOL files to internal spin multiplicity values used by MayaChemTools...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1080 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1081 sub MDLRadicalToInternalSpinMultiplicity {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1082 my($MDLRadical) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1083 my($InternalSpinMultiplicity);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1084
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1085 $InternalSpinMultiplicity = '';
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1086
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1087 SPINMULTIPLICITY: {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1088 if ($MDLRadical == 0) { $InternalSpinMultiplicity = 0; last SPINMULTIPLICITY;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1089 if ($MDLRadical == 1) { $InternalSpinMultiplicity = 1; last SPINMULTIPLICITY;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1090 if ($MDLRadical == 2) { $InternalSpinMultiplicity = 2; last SPINMULTIPLICITY;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1091 if ($MDLRadical == 3) { $InternalSpinMultiplicity = 3; last SPINMULTIPLICITY;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1092 $InternalSpinMultiplicity = '';
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1093 carp "Warning: MDLRadicalToInternalSpinMultiplicity: MDL radical value, $MDLRadical, specifed on line M RAD is not supported...";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1094 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1095 return $InternalSpinMultiplicity;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1096 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1097
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1098 # Map internal spin multiplicity values used by MayaChemTools to MDL radical stereo value used in SD and MOL files...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1099 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1100 sub InternalSpinMultiplicityToMDLRadical {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1101 my($InternalSpinMultiplicity) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1102 my($MDLRadical);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1103
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1104 $MDLRadical = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1105
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1106 SPINMULTIPLICITY: {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1107 if ($InternalSpinMultiplicity == 1) { $MDLRadical = 1; last SPINMULTIPLICITY;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1108 if ($InternalSpinMultiplicity == 2) { $MDLRadical = 2; last SPINMULTIPLICITY;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1109 if ($InternalSpinMultiplicity == 3) { $MDLRadical = 3; last SPINMULTIPLICITY;}
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1110 $MDLRadical = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1111 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1112 return $MDLRadical;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1113 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1114
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1115 # Process generic CTAB property line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1116 sub _ParseCmpdGenericPropertyLine {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1117 my($PropertyName, $Line) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1118
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1119 my($Label, $PropertyLabel, $ValuesCount, $ValuePairsCount, @ValuePairs);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1120
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1121 @ValuePairs = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1122 ($Label, $PropertyLabel, $ValuesCount, @ValuePairs) = split(' ', $Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1123 $ValuePairsCount = (scalar @ValuePairs)/2;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1124 if ($ValuesCount != $ValuePairsCount) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1125 carp "Warning: _ParseCmpdGenericPropertyLine: Number of atom number and $PropertyName value paris specified on $Label $PropertyLabel property line, $ValuePairsCount, does not match expected value of $ValuesCount...";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1126 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1127
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1128 return (@ValuePairs);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1129 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1130
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1131 # Generic CTAB property lines for charge, istope and radical properties...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1132 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1133 sub _GenerateCmpdGenericPropertyLines {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1134 my($PropertyName, $PropertyValuePairsRef) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1135 my($Index, $PropertyLabel, $Line, $PropertyCount, $AtomNum, $PropertyValue, @PropertyLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1136
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1137 @PropertyLines = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1138 NAME: {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1139 if ($PropertyName =~ /^Charge$/i) { $PropertyLabel = "M CHG"; last NAME; }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1140 if ($PropertyName =~ /^Isotope$/i) { $PropertyLabel = "M ISO"; last NAME; }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1141 if ($PropertyName =~ /^Radical$/i) { $PropertyLabel = "M RAD"; last NAME; }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1142 carp "Warning: _GenerateCmpdGenericPropertyLines: Unknown property name, $PropertyName, specified...";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1143 return @PropertyLines;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1144 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1145
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1146 # A maximum of 8 property pair values allowed per line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1147 $PropertyCount = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1148 $Line = '';
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1149 for ($Index = 0; $Index < $#{$PropertyValuePairsRef}; $Index += 2) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1150 if ($PropertyCount > 8) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1151 # Setup property line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1152 $Line = "${PropertyLabel} 8${Line}";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1153 push @PropertyLines, $Line;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1154
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1155 $PropertyCount = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1156 $Line = '';
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1157 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1158 $PropertyCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1159 $AtomNum = $PropertyValuePairsRef->[$Index];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1160 $PropertyValue = $PropertyValuePairsRef->[$Index + 1];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1161 $Line .= sprintf " %3i %3i", $AtomNum, $PropertyValue;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1162 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1163 if ($Line) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1164 $Line = "${PropertyLabel} ${PropertyCount}${Line}";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1165 push @PropertyLines, $Line;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1166 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1167 return @PropertyLines;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1168 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1169
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1170 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1171 # Read compound data into a string and return its value
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1172 sub ReadCmpdString {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1173 my($SDFileRef) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1174 my($CmpdString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1175
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1176 $CmpdString = "";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1177 LINE: while (defined($_ = <$SDFileRef>)) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1178 # Change Windows and Mac new line char to UNIX...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1179 s/(\r\n)|(\r)/\n/g;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1180
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1181 if (/^\$\$\$\$/) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1182 # Take out any new line char at the end by explicitly removing it instead of using
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1183 # chomp, which might not always work correctly on files generated on a system
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1184 # with a value of input line separator different from the current system...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1185 s/\n$//g;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1186
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1187 # Doesn't hurt to chomp...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1188 chomp;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1189
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1190 $CmpdString .= $_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1191 last LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1192 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1193 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1194 $CmpdString .= $_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1195 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1196 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1197 return $CmpdString;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1198 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1199
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1200 # Find out the number of fragements in the compounds. And for the compound with
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1201 # more than one fragment, remove all the others besides the largest one.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1202 sub WashCmpd {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1203 my($CmpdLines) = @_;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1204 my($WashedCmpdString, $FragmentCount, $Fragments);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1205
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1206 $WashedCmpdString = "";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1207 ($FragmentCount, $Fragments) = GetCmpdFragments($CmpdLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1208 if ($FragmentCount > 1) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1209 # Go over the compound data for the largest fragment including property
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1210 # data...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1211 my (@AllFragments, @LargestFragment, %LargestFragmentAtoms, @WashedCmpdLines, $Index, $LineIndex, $AtomCount, $BondCount, $NewAtomCount, $NewBondCount, $FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo, $FirstNewAtomNum, $SecondNewAtomNum, $AtomNum, $ChiralFlag, $BondLine, $MENDLineIndex, $Line, $Value, @ValuePairs, @NewValuePairs, $ValuePairIndex, $NewAtomNum, @NewPropertyLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1212
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1213 @AllFragments = (); @LargestFragment = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1214 %LargestFragmentAtoms = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1215 @AllFragments = split "\n", $Fragments;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1216 @LargestFragment = split " ", $AllFragments[0];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1217 for $Index (0 .. $#LargestFragment) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1218 # Map old atom numbers to new atom numbers as the fragment atom numbers are sorted
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1219 # from lowest to highest old atom numbers...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1220 $LargestFragmentAtoms{$LargestFragment[$Index]} = $Index + 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1221 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1222 @WashedCmpdLines = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1223 push @WashedCmpdLines, @$CmpdLines[0], @$CmpdLines[1], @$CmpdLines[2], @$CmpdLines[3];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1224 ($AtomCount, $BondCount, $ChiralFlag) = ParseCmpdCountsLine(@$CmpdLines[3]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1225 $NewAtomCount = @LargestFragment;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1226 $NewBondCount = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1227 $AtomNum = 0;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1228 # Retrieve the largest fragment atom lines...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1229 for ($LineIndex = 4; $LineIndex < (4 + $AtomCount); $LineIndex++) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1230 $AtomNum++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1231 if ($LargestFragmentAtoms{$AtomNum}) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1232 push @WashedCmpdLines, @$CmpdLines[$LineIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1233 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1234 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1235 # Retrieve the largest fragment bond lines...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1236 for ($LineIndex = 4 + $AtomCount; $LineIndex < (4 + $AtomCount + $BondCount); $LineIndex++) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1237 ($FirstAtomNum, $SecondAtomNum, $BondType, $BondStereo) = ParseCmpdBondLine(@$CmpdLines[$LineIndex]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1238 if ($LargestFragmentAtoms{$FirstAtomNum} && $LargestFragmentAtoms{$SecondAtomNum}) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1239 $NewBondCount++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1240 # Set up bond line with new atom number mapping...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1241 $FirstNewAtomNum = $LargestFragmentAtoms{$FirstAtomNum};
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1242 $SecondNewAtomNum = $LargestFragmentAtoms{$SecondAtomNum};
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1243 $BondLine = GenerateCmpdBondLine($FirstNewAtomNum, $SecondNewAtomNum, $BondType, $BondStereo);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1244 push @WashedCmpdLines, $BondLine;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1245 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1246 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1247 # Get property lines for CHG, ISO and RAD label and map the old atom numbers to new
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1248 # atom numners; Others, property lines before M END line are skipped as atom numbers for
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1249 # other properties might not valid anymore...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1250 #
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1251 $MENDLineIndex = $LineIndex;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1252 LINE: for ($LineIndex = (4 + $AtomCount + $BondCount); $LineIndex < @$CmpdLines; $LineIndex++) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1253 $Line = @$CmpdLines[$LineIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1254 if ($Line =~ /^M END/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1255 push @WashedCmpdLines, "M END";
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1256 $MENDLineIndex = $LineIndex;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1257 last LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1258 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1259
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1260 @ValuePairs = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1261 if ($Line =~ /^M CHG/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1262 @ValuePairs = ParseCmpdChargePropertyLine($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1263 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1264 elsif ($Line =~ /^M RAD/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1265 @ValuePairs = ParseCmpdRadicalPropertyLine($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1266 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1267 elsif ($Line =~ /^M ISO/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1268 @ValuePairs = ParseCmpdIsotopePropertyLine($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1269 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1270 elsif ($Line =~ /^A /i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1271 my($NextLine);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1272 $LineIndex++;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1273 $NextLine = @$CmpdLines[$LineIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1274 @ValuePairs = ParseCmpdAtomAliasPropertyLine($Line, $NextLine);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1275 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1276 else {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1277 next LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1278 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1279
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1280 if (!@ValuePairs) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1281 next LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1282 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1283
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1284 # Collect values for valid atom numbers with mapping to new atom numbers...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1285 @NewValuePairs = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1286 VALUEINDEX: for ($ValuePairIndex = 0; $ValuePairIndex < $#ValuePairs; $ValuePairIndex += 2) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1287 $AtomNum = $ValuePairs[$ValuePairIndex]; $Value = $ValuePairs[$ValuePairIndex + 1];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1288 if (!exists $LargestFragmentAtoms{$AtomNum}) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1289 next VALUEINDEX;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1290 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1291 $NewAtomNum = $LargestFragmentAtoms{$AtomNum};
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1292 push @NewValuePairs, ($NewAtomNum, $Value)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1293 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1294 if (!@NewValuePairs) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1295 next LINE;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1296 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1297 @NewPropertyLines = ();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1298 if ($Line =~ /^M CHG/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1299 @NewPropertyLines = GenerateCmpdChargePropertyLines(\@NewValuePairs);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1300 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1301 elsif ($Line =~ /^M RAD/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1302 @NewPropertyLines = GenerateCmpdRadicalPropertyLines(\@NewValuePairs);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1303 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1304 elsif ($Line =~ /^M ISO/i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1305 @NewPropertyLines = GenerateCmpdIsotopePropertyLines(\@NewValuePairs);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1306 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1307 elsif ($Line =~ /^A /i) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1308 @NewPropertyLines = GenerateCmpdAtomAliasPropertyLines(\@NewValuePairs);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1309 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1310 push @WashedCmpdLines, @NewPropertyLines;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1311 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1312
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1313 # Retrieve rest of the data label and value property data...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1314 for ($LineIndex = (1 + $MENDLineIndex); $LineIndex < @$CmpdLines; $LineIndex++) {
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1315 push @WashedCmpdLines, @$CmpdLines[$LineIndex];
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1316 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1317 # Update atom and bond count line...
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1318 $WashedCmpdLines[3] = GenerateCmpdCountsLine($NewAtomCount, $NewBondCount, $ChiralFlag);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1319
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1320 $WashedCmpdString = join "\n", @WashedCmpdLines;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1321 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1322 return ($FragmentCount, $Fragments, $WashedCmpdString);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1323 }
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1324
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1325 1;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1326
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1327 __END__
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1328
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1329 =head1 NAME
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1330
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1331 SDFileUtil
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1332
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1333 =head1 SYNOPSIS
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1334
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1335 use SDFileUtil ;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1336
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1337 use SDFileUtil qw(:all);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1338
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1339 =head1 DESCRIPTION
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1340
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1341 B<SDFileUtil> module provides the following functions:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1342
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1343 GenerateCmpdAtomAliasPropertyLines, GenerateCmpdAtomLine, GenerateCmpdBondLine,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1344 GenerateCmpdChargePropertyLines, GenerateCmpdCommentsLine, GenerateCmpdCountsLine,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1345 GenerateCmpdDataHeaderLabelsAndValuesLines, GenerateCmpdIsotopePropertyLines,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1346 GenerateCmpdMiscInfoLine, GenerateCmpdMolNameLine,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1347 GenerateCmpdRadicalPropertyLines, GenerateEmptyCtabBlockLines,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1348 GenerateMiscLineDateStamp, GetAllAndCommonCmpdDataHeaderLabels,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1349 GetCmpdDataHeaderLabels, GetCmpdDataHeaderLabelsAndValues, GetCmpdFragments,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1350 GetCtabLinesCount, GetInvalidAtomNumbers, GetUnknownAtoms,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1351 InternalBondOrderToMDLBondType, InternalBondStereochemistryToMDLBondStereo,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1352 InternalChargeToMDLCharge, InternalSpinMultiplicityToMDLRadical, IsCmpd2D,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1353 IsCmpd3D, MDLBondStereoToInternalBondStereochemistry,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1354 MDLBondTypeToInternalBondOrder, MDLChargeToInternalCharge,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1355 MDLRadicalToInternalSpinMultiplicity, ParseCmpdAtomAliasPropertyLine,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1356 ParseCmpdAtomLine, ParseCmpdBondLine, ParseCmpdChargePropertyLine,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1357 ParseCmpdCommentsLine, ParseCmpdCountsLine, ParseCmpdIsotopePropertyLine,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1358 ParseCmpdMiscInfoLine, ParseCmpdMolNameLine, ParseCmpdRadicalPropertyLine,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1359 ReadCmpdString, RemoveCmpdDataHeaderLabelAndValue, WashCmpd
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1360
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1361 =head1 METHODS
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1362
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1363 =over 4
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1364
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1365 =item B<GenerateCmpdAtomAliasPropertyLines>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1366
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1367 @Lines = GenerateCmpdAtomAliasPropertyLines($AliasValuePairsRef);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1368
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1369 Returns a formatted atom alias property lines corresponding to successive pairs
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1370 of atom number and alias values specified by a refernce to an array. Two lines
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1371 are generate for each atom number and alias value pairs: First line - A <AtomNum>;
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1372 Second line:<AtomAlias>.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1373
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1374 =item B<GenerateCmpdAtomLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1375
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1376 $Line = GenerateCmpdAtomLine($AtomSymbol, $AtomX, $AtomY,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1377 $AtomZ, [$MassDifference, $Charge, $StereoParity]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1378
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1379 Returns a formatted atom data line containing all the input values.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1380
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1381 =item B<GenerateCmpdBondLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1382
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1383 $Line = GenerateCmpdBondLine($FirstAtomNum, $SecondAtomNum,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1384 $BondType, [$BondStereo]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1385
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1386 Returns a formatted bond data line containing all the input values.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1387
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1388 =item B<GenerateCmpdChargePropertyLines>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1389
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1390 @Lines = GenerateCmpdChargePropertyLines($ChargeValuePairsRef);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1391
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1392 Returns a formatted M CHG property lines corresponding to successive pairs of
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1393 atom number and charge values specified by a refernce to an array.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1394
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1395 =item B<GenerateCmpdCommentsLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1396
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1397 $Line = GenerateCmpdCommentsLine($Comments);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1398
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1399 Returns a formatted comments data line.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1400
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1401 =item B<GenerateCmpdCountsLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1402
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1403 $Line = GenerateCmpdCountsLine($AtomCount, $BondCount,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1404 $ChiralFlag, [$PropertyCount, $Version]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1405
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1406 Returns a formatted line containing all the input values. The default values of 999
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1407 and V2000 are used for I<PropertyCount> and I<Version>.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1408
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1409 =item B<GenerateCmpdDataHeaderLabelsAndValuesLines>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1410
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1411 @Lines = GenerateCmpdDataHeaderLabelsAndValuesLines(
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1412 $DataHeaderLabelsRef, $DataHeaderLabelsAndValuesRef,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1413 [$SortDataLabels]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1414
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1415 Returns formatted data lines containing header label and values lines corresponding to
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1416 all data header labels in array reference I<DataHeaderLabelsRef> with values in hash
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1417 reference I<DataHeaderLabelsAndValuesRef>. By default, data header labels are
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1418 not sorted and correspond to the label order in array reference I<DataHeaderLabelsRef>.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1419
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1420 =item B<GenerateCmpdIsotopePropertyLines>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1421
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1422 @Lines = GenerateCmpdIsotopePropertyLines($IsotopeValuePairsRef);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1423
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1424 Returns a formatted M ISO property lines corresponding to successive pairs of
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1425 atom number and isotope values specified by a refernce to an array.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1426
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1427 =item B<GenerateCmpdMiscInfoLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1428
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1429 $Line = GenerateCmpdMiscInfoLine([$ProgramName, $UserInitial,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1430 $Code]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1431
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1432 Returns a formatted line containing specified user initial, program name, date and code.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1433 Default values are: I<ProgramName - MayaChem; UserInitial - NULL; Code - 2D>.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1434
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1435 =item B<GenerateCmpdMolNameLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1436
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1437 $Line = GenerateCmpdMolNameLine($MolName);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1438
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1439 Returns a formatted molecule name data line.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1440
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1441 =item B<GenerateCmpdRadicalPropertyLines>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1442
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1443 @Lines = GenerateCmpdRadicalPropertyLines($RadicalValuePairsRef);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1444
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1445 Returns a formatted M CHG property lines corresponding to successive pairs of
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1446 atom number and multiplicity values specified by a refernce to an array.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1447
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1448 =item B<GenerateEmptyCtabBlockLines>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1449
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1450 $Lines = GenerateCmpdMiscInfoLine([$Date]);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1451
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1452 Returns formatted lines representing empty CTAB block.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1453
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1454 =item B<GenerateMiscLineDateStamp>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1455
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1456 $Line = GenerateMiscLineDateStamp();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1457
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1458 Returns date stamp for misc line.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1459
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1460 =item B<GetAllAndCommonCmpdDataHeaderLabels>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1461
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1462 ($CmpdCount, $DataFieldLabelsArrayRef,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1463 $CommonDataFieldLabelsArrayRef) =
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1464 GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1465
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1466 Returns number of comopunds, a reference to an array containing all unique data header
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1467 label and a reference to an array containing common data field labels for all compounds
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1468 in SD file.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1469
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1470 =item B<GetCmpdDataHeaderLabels>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1471
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1472 (@Labels) = GetCmpdDataHeaderLabels(\@CmpdLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1473
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1474 Returns an array containg data header labels for a compound
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1475
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1476 =item B<GetCmpdDataHeaderLabelsAndValues>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1477
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1478 (%DataValues) = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1479
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1480 Returns a hash conating data header labes and values for a compound.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1481
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1482 =item B<GetCmpdFragments>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1483
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1484 ($FragmentCount, $FragmentString) = GetCmpdFragments(\@CmpLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1485
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1486 Figures out the number of disconnected fragments and return their values along
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1487 with the atom numbers in a string delimited by new line character. Fragment data
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1488 in B<FragmentString> is sorted on based on its size.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1489
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1490 =item B<GetCtabLinesCount>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1491
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1492 $CtabLinesCount = GetCtabLinesCount(\@CmpdLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1493
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1494 Returns number of lines present between the 4th line and the line containg "M END".
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1495
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1496 =item B<GetInvalidAtomNumbers>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1497
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1498 ($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) =
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1499 GetInvalidAtomNumbers(\@CmpdLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1500
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1501 Returns a list of values containing information about invalid atom numbers present
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1502 in block or atom property lines.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1503
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1504 =item B<GetUnknownAtoms>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1505
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1506 ($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) =
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1507 GetUnknownAtoms(\@CmpdLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1508
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1509 Returns a list of values containing information about atoms which contain special element
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1510 symbols not present in the periodic table.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1511
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1512 =item B<InternalBondOrderToMDLBondType>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1513
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1514 $MDLBondType = InternalBondOrderToMDLBondType($InternalBondOrder);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1515
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1516 Returns value of I<MDLBondType> corresponding to I<InternalBondOrder>.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1517
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1518 InternalBondOrder MDLBondType
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1519
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1520 1 1
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1521 2 2
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1522 3 3
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1523 1.5 4
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1524
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1525 =item B<InternalBondStereochemistryToMDLBondStereo>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1526
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1527 $MDLBondStereo = InternalBondStereochemistryToMDLBondStereo(
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1528 $InternalBondStereo);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1529
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1530 Returns value of I<MDLBondStereo> corresponding to I<InternalBondStereo> using following
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1531 mapping:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1532
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1533 InternalBondStereo MDLBondStereo
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1534
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1535 Up 1
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1536 UpOrDown 4
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1537 Down 6
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1538 CisOrTrans 3
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1539 Other 0
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1540
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1541 =item B<InternalChargeToMDLCharge>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1542
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1543 $MDLCharge = InternalChargeToMDLCharge($InternalCharge);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1544
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1545 Returns value of I<MDLCharge> corresponding to I<InternalCharge> using following
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1546 mapping:
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1547
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1548 InternalCharge MDLCharge
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1549
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1550 3 1
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1551 2 2
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1552 1 3
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1553 -1 5
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1554 -2 6
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1555 -3 7
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1556
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1557 =item B<InternalSpinMultiplicityToMDLRadical>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1558
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1559 $MDLRadical = InternalSpinMultiplicityToMDLRadical(
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1560 $InternalSpinMultiplicity);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1561
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1562 Returns value of I<MDLRadical> corresponding to I<InternalSpinMultiplicity>. These
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1563 value are equivalent.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1564
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1565 =item B<MDLBondStereoToInternalBondType>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1566
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1567 $InternalBondType = MDLBondStereoToInternalBondType($MDLBondStereo);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1568
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1569 Returns value of I<InternalBondType> corresponding to I<MDLBondStereo> using
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1570 mapping shown for B<InternalBondTypeToMDLBondStereo> function.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1571
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1572 =item B<IsCmpd2D>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1573
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1574 $Status = IsCmpd2D();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1575
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1576 Returns 1 or 0 based on whether z-coordinate of any atom is non-zero.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1577
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1578 =item B<IsCmpd3D>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1579
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1580 $Status = IsCmpd3D();
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1581
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1582 Returns 1 or 0 based on whether z-coordinate of any atom is non-zero.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1583
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1584 =item B<MDLBondStereoToInternalBondStereochemistry>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1585
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1586 $InternalBondStereo = MDLBondStereoToInternalBondStereochemistry(
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1587 $MDLBondStereo);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1588
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1589 Returns value of I<InternalBondStereo> corresponding to I<MDLBondStereo> using
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1590 mapping shown for B<InternalBondStereochemistryToMDLBondStereo> function.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1591
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1592 =item B<MDLBondTypeToInternalBondOrder>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1593
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1594 $InternalBondOrder = MDLBondTypeToInternalBondOrder($MDLBondType);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1595
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1596 Returns value of I<InternalBondOrder> corresponding to I<MDLBondType> using
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1597 mapping shown for B<InternalBondOrderToMDLBondType> function.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1598
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1599 =item B<MDLChargeToInternalCharge>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1600
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1601 $InternalCharge = MDLChargeToInternalCharge($MDLCharge);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1602
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1603 Returns value of I<$InternalCharge> corresponding to I<MDLCharge> using
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1604 mapping shown for B<InternalChargeToMDLCharge> function.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1605
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1606 =item B<MDLRadicalToInternalSpinMultiplicity>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1607
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1608 $InternalSpinMultiplicity = MDLRadicalToInternalSpinMultiplicity(
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1609 $MDLRadical);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1610
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1611 Returns value of I<InternalSpinMultiplicity> corresponding to I<MDLRadical>. These
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1612 value are equivalent.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1613
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1614 =item B<ParseCmpdAtomAliasPropertyLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1615
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1616 @AtomNumAndValuePairs = ParseCmpdAtomAliasPropertyLine(
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1617 $CurrentLine, $NexLine);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1618
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1619 Parses atom alias propery lines in CTAB generic properties block and returns an array
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1620 with successive pairs of values corresponding to atom number and its alias.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1621
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1622 =item B<ParseCmpdAtomLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1623
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1624 ($AtomSymbol, $AtomX, $AtomY, $AtomZ, $MassDifference, $Charge,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1625 $StereoParity) = ParseCmpdAtomLine($AtomDataLine);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1626
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1627 Parses compound data line containing atom information and returns a list
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1628 of values.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1629
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1630 =item B<ParseCmpdBondLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1631
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1632 ($FirstAtomNum, $SecondAtomNum, $BondType) =
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1633 ParseCmpdBondLine($BondDataLine);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1634
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1635 Parses compound data line containing bond information and returns a list of
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1636 values.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1637
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1638 =item B<ParseCmpdCommentsLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1639
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1640 $Comments = ParseCmpdCommentsLine($CommentsDataLine);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1641
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1642 Returns the comment string.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1643
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1644 =item B<ParseCmpdChargePropertyLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1645
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1646 @AtomNumAndValuePairs = ParseCmpdChargePropertyLine(
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1647 $ChargeDataLine);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1648
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1649 Parses charge propery line in CTAB generic properties block and returns an array
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1650 with successive pairs of values corresponding to atom number and its charge.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1651
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1652 =item B<ParseCmpdCountsLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1653
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1654 ($AtomCount, $BondCount, $ChiralFlag, $PropertyCount, $Version) =
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1655 ParseCmpdCountsLine(\@CountDataLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1656
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1657 Returns a list of values containing count information.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1658
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1659 =item B<ParseCmpdMiscInfoLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1660
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1661 ($UserInitial, $ProgramName, $Date, $Code, $ScalingFactor1, $ScalingFactor2,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1662 $Energy, $RegistryNum) = ParseCmpdMiscInfoLine($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1663
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1664 Returns a list of values containing miscellaneous information.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1665
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1666 =item B<ParseCmpdIsotopePropertyLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1667
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1668 @AtomNumAndValuePairs = ParseCmpdIsotopePropertyLine(
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1669 $IsotopeDataLine);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1670
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1671 Parses isotopic propery line in CTAB generic properties block and returns an array
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1672 with successive pairs of values corresponding to atom number and absolute mass of
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1673 atom isotope.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1674
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1675 =item B<ParseCmpdMolNameLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1676
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1677 $MolName = ParseCmpdMolNameLine($Line);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1678
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1679 Returns a string containing molecule name.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1680
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1681 =item B<ParseCmpdRadicalPropertyLine>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1682
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1683 @AtomNumAndValuePairs = ParseCmpdRadicalPropertyLine(
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1684 $RadicalDataLine);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1685
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1686 Parses radical propery line in CTAB generic properties block and returns an array
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1687 with successive pairs of values corresponding to atom number and radical number
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1688 value.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1689
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1690 =item B<RemoveCmpdDataHeaderLabelAndValue>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1691
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1692 $NewCmpdString = RemoveCmpdDataHeaderLabelAndValue($CmpdString,
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1693 $DataHeaderLabel);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1694
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1695 Returns a B<NewCmpdString> after removing I<DataHeaderLabel> along with its
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1696 value from I<CmpdString>.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1697
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1698 =item B<ReadCmpdString>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1699
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1700 $CmpdString = ReadCmpdString(\*SDFILEHANDLE);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1701
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1702 Returns a string containing all the data lines for the next available compound
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1703 in an already open file indicated by SDFILEHANDLE. A NULL string is returned
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1704 on EOF.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1705
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1706 =item B<WashCmpd>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1707
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1708 ($FragmentCount, $Fragments, $WashedCmpdString) =
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1709 WashCmpd(\@CmpdLines);
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1710
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1711 Figures out the number of disconnected fragments and return their values along
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1712 with the atom numbers in a string delimited by new line character. Fragment data
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1713 in B<FragmentString> is sorted on based on its size.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1714
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1715 =back
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1716
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1717 =head1 AUTHOR
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1718
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1719 Manish Sud <msud@san.rr.com>
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1720
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1721 =head1 SEE ALSO
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1722
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1723 TextUtil.pm
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1724
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1725 =head1 COPYRIGHT
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1726
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1727 Copyright (C) 2015 Manish Sud. All rights reserved.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1728
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1729 This file is part of MayaChemTools.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1730
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1731 MayaChemTools is free software; you can redistribute it and/or modify it under
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1732 the terms of the GNU Lesser General Public License as published by the Free
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1733 Software Foundation; either version 3 of the License, or (at your option)
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1734 any later version.
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1735
a4a2ad5a214e Uploaded
deepakjadmin
parents:
diff changeset
1736 =cut