comparison lib/FileIO/SDFileIO.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 package FileIO::SDFileIO;
2 #
3 # $RCSfile: SDFileIO.pm,v $
4 # $Date: 2015/02/28 20:48:43 $
5 # $Revision: 1.35 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use Scalar::Util ();
33 use TextUtil ();
34 use FileUtil ();
35 use SDFileUtil ();
36 use FileIO::FileIO;
37 use FileIO::MDLMolFileIO;
38 use Molecule;
39
40 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
41
42 @ISA = qw(FileIO::FileIO Exporter);
43 @EXPORT = qw();
44 @EXPORT_OK = qw(IsSDFile);
45
46 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
47
48 # Setup class variables...
49 my($ClassName);
50 _InitializeClass();
51
52 # Class constructor...
53 sub new {
54 my($Class, %NamesAndValues) = @_;
55
56 # Initialize object...
57 my $This = $Class->SUPER::new();
58 bless $This, ref($Class) || $Class;
59 $This->_InitializeSDFileIO();
60
61 $This->_InitializeSDFileIOProperties(%NamesAndValues);
62
63 return $This;
64 }
65
66 # Initialize any local object data...
67 #
68 sub _InitializeSDFileIO {
69 my($This) = @_;
70
71 # Sorting of MDL data fields during output: Keep the initial order or write 'em out alphabetically...
72 $This->{SortDataFieldsDuringOutput} = 'No';
73
74 return $This;
75 }
76
77 # Initialize class ...
78 sub _InitializeClass {
79 #Class name...
80 $ClassName = __PACKAGE__;
81
82 }
83
84 # Initialize object values...
85 sub _InitializeSDFileIOProperties {
86 my($This, %NamesAndValues) = @_;
87
88 # All other property names and values along with all Set/Get<PropertyName> methods
89 # are implemented on-demand using ObjectProperty class.
90
91 my($Name, $Value, $MethodName);
92 while (($Name, $Value) = each %NamesAndValues) {
93 $MethodName = "Set${Name}";
94 $This->$MethodName($Value);
95 }
96
97 if (!exists $NamesAndValues{Name}) {
98 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name...";
99 }
100
101 # Make sure it's a SD file...
102 $Name = $NamesAndValues{Name};
103 if (!$This->IsSDFile($Name)) {
104 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be SDF format...";
105 }
106
107 return $This;
108 }
109
110 # Is it a SD file?
111 sub IsSDFile ($;$) {
112 my($FirstParameter, $SecondParameter) = @_;
113 my($This, $FileName, $Status);
114
115 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
116 ($This, $FileName) = ($FirstParameter, $SecondParameter);
117 }
118 else {
119 $FileName = $FirstParameter;
120 }
121
122 # Check file extension...
123 $Status = FileUtil::CheckFileType($FileName, "sd sdf");
124
125 return $Status;
126 }
127
128 # Read molecule from file and return molecule object...
129 sub ReadMolecule {
130 my($This) = @_;
131 my($FileHandle);
132
133 $FileHandle = $This->GetFileHandle();
134 return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle));
135 }
136
137 # Write compound data along with any data field label and values using Molecule object...
138 sub WriteMolecule {
139 my($This, $Molecule) = @_;
140
141 if (!(defined($Molecule) && $Molecule->IsMolecule())) {
142 carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified...";
143 return $This;
144 }
145 my($FileHandle);
146 $FileHandle = $This->GetFileHandle();
147
148 print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n";
149
150 return $This;
151 }
152
153 # Retrieve molecule string...
154 sub ReadMoleculeString {
155 my($This) = @_;
156 my($FileHandle);
157
158 $FileHandle = $This->GetFileHandle();
159 return SDFileUtil::ReadCmpdString($FileHandle);
160 }
161
162 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class
163 # method or a package function.
164 #
165 sub ParseMoleculeString {
166 my($FirstParameter, $SecondParameter) = @_;
167 my($This, $MoleculeString);
168
169 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
170 ($This, $MoleculeString) = ($FirstParameter, $SecondParameter);
171 }
172 else {
173 $MoleculeString = $FirstParameter;
174 $This = undef;
175 }
176 if (!$MoleculeString) {
177 return undef;
178 }
179 # Parse molecule data...
180 my($Molecule);
181 $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString);
182
183 # Process data label/value pairs...
184 my(@MoleculeLines, @DataLabels, %DataLabelsAndValues);
185
186 %DataLabelsAndValues = ();
187 @MoleculeLines = split /\n/, $MoleculeString;
188 @DataLabels = SDFileUtil::GetCmpdDataHeaderLabels(\@MoleculeLines);
189 %DataLabelsAndValues = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@MoleculeLines);
190
191 # Store reference to data labels to keep track of their initial order in SD file...
192 $Molecule->SetDataFieldLabels(\@DataLabels);
193
194 # Store reference to SD data label/value pairs hash as a generic property of molecule...
195 $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues);
196
197 return $Molecule;
198 }
199
200 # Generate molecule string using molecule object...
201 sub GenerateMoleculeString {
202 my($FirstParameter, $SecondParameter) = @_;
203 my($This, $Molecule);
204
205 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
206 ($This, $Molecule) = ($FirstParameter, $SecondParameter);
207 }
208 else {
209 $Molecule = $FirstParameter;
210 $This = undef;
211 }
212 if (!defined($Molecule)) {
213 return undef;
214 }
215 # Generate CTAB data...
216 my($CmpdString);
217 $CmpdString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule);
218
219 # Generate any data field labels and values...
220 my($DataFieldLabelsAndValuesString);
221
222 $DataFieldLabelsAndValuesString = '';
223 if ($Molecule->HasProperty('DataFieldLabels')) {
224 my($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields);
225
226 $SortDataFields = (exists($This->{SortDataFieldsDuringOutput}) && $This->{SortDataFieldsDuringOutput} =~ /^Yes$/i) ? 1 : 0;
227
228 $DataFieldLabelsRef = $Molecule->GetDataFieldLabels();
229 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
230 $DataFieldLabelsAndValuesString = join "\n", SDFileUtil::GenerateCmpdDataHeaderLabelsAndValuesLines($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields);
231 }
232
233 return "${CmpdString }\n${DataFieldLabelsAndValuesString}\n\$\$\$\$";
234 }
235
236
237 # Is it a SDFileIO object?
238 sub _IsSDFileIO {
239 my($Object) = @_;
240
241 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
242 }
243
244 1;
245
246 __END__
247
248 =head1 NAME
249
250 SDFileIO
251
252 =head1 SYNOPSIS
253
254 use FileIO::SDFileIO;
255
256 use FileIO::SDFileIO qw(:all);
257
258 =head1 DESCRIPTION
259
260 B<SDFIleIO> class provides the following methods:
261
262 new, GenerateMoleculeString, IsSDFile, ParseMoleculeString, ReadMolecule,
263 ReadMoleculeString, WriteMolecule
264
265 The following methods can also be used as functions:
266
267 GenerateMoleculeString, IsSDFile, ParseMoleculeString
268
269 Data specific to B<SDFileIO> class not directly used by B<Molecule>, B<Atom> and
270 B<Bond> objects - data label/value pairs, atom SteroParity and so on - is associated to
271 and retrieved from approptiate objects using following methods:
272
273 SetMDL<PropertyName>
274 GetMDL<PropertyName>.
275
276 SD data label and values are attached to B<Molecule> object as a refernece to a hash
277 using SetDataFieldLabelAndValues and can be retrieved using GetDataFieldLabelAndValues
278 method.
279
280 B<SDFileIO> class is derived from I<FileIO> class and uses its methods to support
281 generic file related functionality.
282
283 =head2 METHODS
284
285 =over 4
286
287 =item B<new>
288
289 $NewSDFileIO = new FileIO::SDFileIO(%NamesAndValues);
290
291 Using specified I<SDFileIO> property names and values hash, B<new> method creates a new object
292 and returns a reference to newly created B<SDFileIO> object.
293
294 =item B<GenerateMoleculeString>
295
296 $MoleculeString = $SDFileIO->GenerateMoleculeString($Molecule);
297 $MoleculeString = FileIO::SDFileIO::GenerateMoleculeString($Molecule);
298
299 Returns a B<MoleculeString> in SD format corresponding to I<Molecule>.
300
301 =item B<IsSDFile>
302
303 $Status = $SDFileIO->IsSDFile($FileName);
304 $Status = FileIO::SDFileIO::IsSDFile($FileName);
305
306 Returns 1 or 0 based on whether I<FileName> is a SD file.
307
308 =item B<ParseMoleculeString>
309
310 $Molecule = $SDFileIO->ParseMoleculeString($MoleculeString);
311 $Molecule = FileIO::SDFileIO::ParseMoleculeString($MoleculeString);
312
313 Parses I<MoleculeString> and returns a B<Molecule> object. SD data field label and value pairs
314 are associated to B<Molecule> object as a reference to a hash using:
315
316 $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues)
317
318 The reference to hash can be retrieved by:
319
320 $DataLabelsAndValues = $Molecule->GetDataFieldLabelAndValues();
321 for $DataLabel (sort keys %{$DataLabelsAndValues}) {
322 $DataValue = $DataLabelsAndValues->{$DataLabel};
323 }
324
325 =item B<ReadMolecule>
326
327 $Molecule = $SDFileIO->ReadMolecule($FileHandle);
328
329 Reads data for the next compound in a file using already opened I<FileHandle>, creates,
330 and returns a B<Molecule> object.
331
332 =item B<ReadMoleculeString>
333
334 $MoleculeString = $SDFileIO->ReadMoleculeString($FileHandle);
335
336 Reads data for the next compound in a file using already opened I<FileHandle> and
337 returns a B<MoleculeString> corresponding to compound structure and other associated
338 data.
339
340 =item B<WriteMolecule>
341
342 $SDFileIO->WriteMolecule($Molecule);
343
344 Writes I<Molecule> data to a file in MDLMol format and returns B<SDFileIO>.
345
346 =back
347
348 =head1 AUTHOR
349
350 Manish Sud <msud@san.rr.com>
351
352 =head1 SEE ALSO
353
354 MoleculeFileIO.pm, MDLMolFileIO.pm
355
356 =head1 COPYRIGHT
357
358 Copyright (C) 2015 Manish Sud. All rights reserved.
359
360 This file is part of MayaChemTools.
361
362 MayaChemTools is free software; you can redistribute it and/or modify it under
363 the terms of the GNU Lesser General Public License as published by the Free
364 Software Foundation; either version 3 of the License, or (at your option)
365 any later version.
366
367 =cut