0
|
1 package FileIO::SDFileIO;
|
|
2 #
|
|
3 # $RCSfile: SDFileIO.pm,v $
|
|
4 # $Date: 2015/02/28 20:48:43 $
|
|
5 # $Revision: 1.35 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use Scalar::Util ();
|
|
33 use TextUtil ();
|
|
34 use FileUtil ();
|
|
35 use SDFileUtil ();
|
|
36 use FileIO::FileIO;
|
|
37 use FileIO::MDLMolFileIO;
|
|
38 use Molecule;
|
|
39
|
|
40 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
41
|
|
42 @ISA = qw(FileIO::FileIO Exporter);
|
|
43 @EXPORT = qw();
|
|
44 @EXPORT_OK = qw(IsSDFile);
|
|
45
|
|
46 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
47
|
|
48 # Setup class variables...
|
|
49 my($ClassName);
|
|
50 _InitializeClass();
|
|
51
|
|
52 # Class constructor...
|
|
53 sub new {
|
|
54 my($Class, %NamesAndValues) = @_;
|
|
55
|
|
56 # Initialize object...
|
|
57 my $This = $Class->SUPER::new();
|
|
58 bless $This, ref($Class) || $Class;
|
|
59 $This->_InitializeSDFileIO();
|
|
60
|
|
61 $This->_InitializeSDFileIOProperties(%NamesAndValues);
|
|
62
|
|
63 return $This;
|
|
64 }
|
|
65
|
|
66 # Initialize any local object data...
|
|
67 #
|
|
68 sub _InitializeSDFileIO {
|
|
69 my($This) = @_;
|
|
70
|
|
71 # Sorting of MDL data fields during output: Keep the initial order or write 'em out alphabetically...
|
|
72 $This->{SortDataFieldsDuringOutput} = 'No';
|
|
73
|
|
74 return $This;
|
|
75 }
|
|
76
|
|
77 # Initialize class ...
|
|
78 sub _InitializeClass {
|
|
79 #Class name...
|
|
80 $ClassName = __PACKAGE__;
|
|
81
|
|
82 }
|
|
83
|
|
84 # Initialize object values...
|
|
85 sub _InitializeSDFileIOProperties {
|
|
86 my($This, %NamesAndValues) = @_;
|
|
87
|
|
88 # All other property names and values along with all Set/Get<PropertyName> methods
|
|
89 # are implemented on-demand using ObjectProperty class.
|
|
90
|
|
91 my($Name, $Value, $MethodName);
|
|
92 while (($Name, $Value) = each %NamesAndValues) {
|
|
93 $MethodName = "Set${Name}";
|
|
94 $This->$MethodName($Value);
|
|
95 }
|
|
96
|
|
97 if (!exists $NamesAndValues{Name}) {
|
|
98 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name...";
|
|
99 }
|
|
100
|
|
101 # Make sure it's a SD file...
|
|
102 $Name = $NamesAndValues{Name};
|
|
103 if (!$This->IsSDFile($Name)) {
|
|
104 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be SDF format...";
|
|
105 }
|
|
106
|
|
107 return $This;
|
|
108 }
|
|
109
|
|
110 # Is it a SD file?
|
|
111 sub IsSDFile ($;$) {
|
|
112 my($FirstParameter, $SecondParameter) = @_;
|
|
113 my($This, $FileName, $Status);
|
|
114
|
|
115 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
|
|
116 ($This, $FileName) = ($FirstParameter, $SecondParameter);
|
|
117 }
|
|
118 else {
|
|
119 $FileName = $FirstParameter;
|
|
120 }
|
|
121
|
|
122 # Check file extension...
|
|
123 $Status = FileUtil::CheckFileType($FileName, "sd sdf");
|
|
124
|
|
125 return $Status;
|
|
126 }
|
|
127
|
|
128 # Read molecule from file and return molecule object...
|
|
129 sub ReadMolecule {
|
|
130 my($This) = @_;
|
|
131 my($FileHandle);
|
|
132
|
|
133 $FileHandle = $This->GetFileHandle();
|
|
134 return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle));
|
|
135 }
|
|
136
|
|
137 # Write compound data along with any data field label and values using Molecule object...
|
|
138 sub WriteMolecule {
|
|
139 my($This, $Molecule) = @_;
|
|
140
|
|
141 if (!(defined($Molecule) && $Molecule->IsMolecule())) {
|
|
142 carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified...";
|
|
143 return $This;
|
|
144 }
|
|
145 my($FileHandle);
|
|
146 $FileHandle = $This->GetFileHandle();
|
|
147
|
|
148 print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n";
|
|
149
|
|
150 return $This;
|
|
151 }
|
|
152
|
|
153 # Retrieve molecule string...
|
|
154 sub ReadMoleculeString {
|
|
155 my($This) = @_;
|
|
156 my($FileHandle);
|
|
157
|
|
158 $FileHandle = $This->GetFileHandle();
|
|
159 return SDFileUtil::ReadCmpdString($FileHandle);
|
|
160 }
|
|
161
|
|
162 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class
|
|
163 # method or a package function.
|
|
164 #
|
|
165 sub ParseMoleculeString {
|
|
166 my($FirstParameter, $SecondParameter) = @_;
|
|
167 my($This, $MoleculeString);
|
|
168
|
|
169 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
|
|
170 ($This, $MoleculeString) = ($FirstParameter, $SecondParameter);
|
|
171 }
|
|
172 else {
|
|
173 $MoleculeString = $FirstParameter;
|
|
174 $This = undef;
|
|
175 }
|
|
176 if (!$MoleculeString) {
|
|
177 return undef;
|
|
178 }
|
|
179 # Parse molecule data...
|
|
180 my($Molecule);
|
|
181 $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString);
|
|
182
|
|
183 # Process data label/value pairs...
|
|
184 my(@MoleculeLines, @DataLabels, %DataLabelsAndValues);
|
|
185
|
|
186 %DataLabelsAndValues = ();
|
|
187 @MoleculeLines = split /\n/, $MoleculeString;
|
|
188 @DataLabels = SDFileUtil::GetCmpdDataHeaderLabels(\@MoleculeLines);
|
|
189 %DataLabelsAndValues = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@MoleculeLines);
|
|
190
|
|
191 # Store reference to data labels to keep track of their initial order in SD file...
|
|
192 $Molecule->SetDataFieldLabels(\@DataLabels);
|
|
193
|
|
194 # Store reference to SD data label/value pairs hash as a generic property of molecule...
|
|
195 $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues);
|
|
196
|
|
197 return $Molecule;
|
|
198 }
|
|
199
|
|
200 # Generate molecule string using molecule object...
|
|
201 sub GenerateMoleculeString {
|
|
202 my($FirstParameter, $SecondParameter) = @_;
|
|
203 my($This, $Molecule);
|
|
204
|
|
205 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
|
|
206 ($This, $Molecule) = ($FirstParameter, $SecondParameter);
|
|
207 }
|
|
208 else {
|
|
209 $Molecule = $FirstParameter;
|
|
210 $This = undef;
|
|
211 }
|
|
212 if (!defined($Molecule)) {
|
|
213 return undef;
|
|
214 }
|
|
215 # Generate CTAB data...
|
|
216 my($CmpdString);
|
|
217 $CmpdString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule);
|
|
218
|
|
219 # Generate any data field labels and values...
|
|
220 my($DataFieldLabelsAndValuesString);
|
|
221
|
|
222 $DataFieldLabelsAndValuesString = '';
|
|
223 if ($Molecule->HasProperty('DataFieldLabels')) {
|
|
224 my($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields);
|
|
225
|
|
226 $SortDataFields = (exists($This->{SortDataFieldsDuringOutput}) && $This->{SortDataFieldsDuringOutput} =~ /^Yes$/i) ? 1 : 0;
|
|
227
|
|
228 $DataFieldLabelsRef = $Molecule->GetDataFieldLabels();
|
|
229 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
|
|
230 $DataFieldLabelsAndValuesString = join "\n", SDFileUtil::GenerateCmpdDataHeaderLabelsAndValuesLines($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields);
|
|
231 }
|
|
232
|
|
233 return "${CmpdString }\n${DataFieldLabelsAndValuesString}\n\$\$\$\$";
|
|
234 }
|
|
235
|
|
236
|
|
237 # Is it a SDFileIO object?
|
|
238 sub _IsSDFileIO {
|
|
239 my($Object) = @_;
|
|
240
|
|
241 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
|
|
242 }
|
|
243
|
|
244 1;
|
|
245
|
|
246 __END__
|
|
247
|
|
248 =head1 NAME
|
|
249
|
|
250 SDFileIO
|
|
251
|
|
252 =head1 SYNOPSIS
|
|
253
|
|
254 use FileIO::SDFileIO;
|
|
255
|
|
256 use FileIO::SDFileIO qw(:all);
|
|
257
|
|
258 =head1 DESCRIPTION
|
|
259
|
|
260 B<SDFIleIO> class provides the following methods:
|
|
261
|
|
262 new, GenerateMoleculeString, IsSDFile, ParseMoleculeString, ReadMolecule,
|
|
263 ReadMoleculeString, WriteMolecule
|
|
264
|
|
265 The following methods can also be used as functions:
|
|
266
|
|
267 GenerateMoleculeString, IsSDFile, ParseMoleculeString
|
|
268
|
|
269 Data specific to B<SDFileIO> class not directly used by B<Molecule>, B<Atom> and
|
|
270 B<Bond> objects - data label/value pairs, atom SteroParity and so on - is associated to
|
|
271 and retrieved from approptiate objects using following methods:
|
|
272
|
|
273 SetMDL<PropertyName>
|
|
274 GetMDL<PropertyName>.
|
|
275
|
|
276 SD data label and values are attached to B<Molecule> object as a refernece to a hash
|
|
277 using SetDataFieldLabelAndValues and can be retrieved using GetDataFieldLabelAndValues
|
|
278 method.
|
|
279
|
|
280 B<SDFileIO> class is derived from I<FileIO> class and uses its methods to support
|
|
281 generic file related functionality.
|
|
282
|
|
283 =head2 METHODS
|
|
284
|
|
285 =over 4
|
|
286
|
|
287 =item B<new>
|
|
288
|
|
289 $NewSDFileIO = new FileIO::SDFileIO(%NamesAndValues);
|
|
290
|
|
291 Using specified I<SDFileIO> property names and values hash, B<new> method creates a new object
|
|
292 and returns a reference to newly created B<SDFileIO> object.
|
|
293
|
|
294 =item B<GenerateMoleculeString>
|
|
295
|
|
296 $MoleculeString = $SDFileIO->GenerateMoleculeString($Molecule);
|
|
297 $MoleculeString = FileIO::SDFileIO::GenerateMoleculeString($Molecule);
|
|
298
|
|
299 Returns a B<MoleculeString> in SD format corresponding to I<Molecule>.
|
|
300
|
|
301 =item B<IsSDFile>
|
|
302
|
|
303 $Status = $SDFileIO->IsSDFile($FileName);
|
|
304 $Status = FileIO::SDFileIO::IsSDFile($FileName);
|
|
305
|
|
306 Returns 1 or 0 based on whether I<FileName> is a SD file.
|
|
307
|
|
308 =item B<ParseMoleculeString>
|
|
309
|
|
310 $Molecule = $SDFileIO->ParseMoleculeString($MoleculeString);
|
|
311 $Molecule = FileIO::SDFileIO::ParseMoleculeString($MoleculeString);
|
|
312
|
|
313 Parses I<MoleculeString> and returns a B<Molecule> object. SD data field label and value pairs
|
|
314 are associated to B<Molecule> object as a reference to a hash using:
|
|
315
|
|
316 $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues)
|
|
317
|
|
318 The reference to hash can be retrieved by:
|
|
319
|
|
320 $DataLabelsAndValues = $Molecule->GetDataFieldLabelAndValues();
|
|
321 for $DataLabel (sort keys %{$DataLabelsAndValues}) {
|
|
322 $DataValue = $DataLabelsAndValues->{$DataLabel};
|
|
323 }
|
|
324
|
|
325 =item B<ReadMolecule>
|
|
326
|
|
327 $Molecule = $SDFileIO->ReadMolecule($FileHandle);
|
|
328
|
|
329 Reads data for the next compound in a file using already opened I<FileHandle>, creates,
|
|
330 and returns a B<Molecule> object.
|
|
331
|
|
332 =item B<ReadMoleculeString>
|
|
333
|
|
334 $MoleculeString = $SDFileIO->ReadMoleculeString($FileHandle);
|
|
335
|
|
336 Reads data for the next compound in a file using already opened I<FileHandle> and
|
|
337 returns a B<MoleculeString> corresponding to compound structure and other associated
|
|
338 data.
|
|
339
|
|
340 =item B<WriteMolecule>
|
|
341
|
|
342 $SDFileIO->WriteMolecule($Molecule);
|
|
343
|
|
344 Writes I<Molecule> data to a file in MDLMol format and returns B<SDFileIO>.
|
|
345
|
|
346 =back
|
|
347
|
|
348 =head1 AUTHOR
|
|
349
|
|
350 Manish Sud <msud@san.rr.com>
|
|
351
|
|
352 =head1 SEE ALSO
|
|
353
|
|
354 MoleculeFileIO.pm, MDLMolFileIO.pm
|
|
355
|
|
356 =head1 COPYRIGHT
|
|
357
|
|
358 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
359
|
|
360 This file is part of MayaChemTools.
|
|
361
|
|
362 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
363 the terms of the GNU Lesser General Public License as published by the Free
|
|
364 Software Foundation; either version 3 of the License, or (at your option)
|
|
365 any later version.
|
|
366
|
|
367 =cut
|