Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/FileIO/SDFileIO.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 package FileIO::SDFileIO; | |
2 # | |
3 # $RCSfile: SDFileIO.pm,v $ | |
4 # $Date: 2015/02/28 20:48:43 $ | |
5 # $Revision: 1.35 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Exporter; | |
32 use Scalar::Util (); | |
33 use TextUtil (); | |
34 use FileUtil (); | |
35 use SDFileUtil (); | |
36 use FileIO::FileIO; | |
37 use FileIO::MDLMolFileIO; | |
38 use Molecule; | |
39 | |
40 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
41 | |
42 @ISA = qw(FileIO::FileIO Exporter); | |
43 @EXPORT = qw(); | |
44 @EXPORT_OK = qw(IsSDFile); | |
45 | |
46 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
47 | |
48 # Setup class variables... | |
49 my($ClassName); | |
50 _InitializeClass(); | |
51 | |
52 # Class constructor... | |
53 sub new { | |
54 my($Class, %NamesAndValues) = @_; | |
55 | |
56 # Initialize object... | |
57 my $This = $Class->SUPER::new(); | |
58 bless $This, ref($Class) || $Class; | |
59 $This->_InitializeSDFileIO(); | |
60 | |
61 $This->_InitializeSDFileIOProperties(%NamesAndValues); | |
62 | |
63 return $This; | |
64 } | |
65 | |
66 # Initialize any local object data... | |
67 # | |
68 sub _InitializeSDFileIO { | |
69 my($This) = @_; | |
70 | |
71 # Sorting of MDL data fields during output: Keep the initial order or write 'em out alphabetically... | |
72 $This->{SortDataFieldsDuringOutput} = 'No'; | |
73 | |
74 return $This; | |
75 } | |
76 | |
77 # Initialize class ... | |
78 sub _InitializeClass { | |
79 #Class name... | |
80 $ClassName = __PACKAGE__; | |
81 | |
82 } | |
83 | |
84 # Initialize object values... | |
85 sub _InitializeSDFileIOProperties { | |
86 my($This, %NamesAndValues) = @_; | |
87 | |
88 # All other property names and values along with all Set/Get<PropertyName> methods | |
89 # are implemented on-demand using ObjectProperty class. | |
90 | |
91 my($Name, $Value, $MethodName); | |
92 while (($Name, $Value) = each %NamesAndValues) { | |
93 $MethodName = "Set${Name}"; | |
94 $This->$MethodName($Value); | |
95 } | |
96 | |
97 if (!exists $NamesAndValues{Name}) { | |
98 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; | |
99 } | |
100 | |
101 # Make sure it's a SD file... | |
102 $Name = $NamesAndValues{Name}; | |
103 if (!$This->IsSDFile($Name)) { | |
104 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be SDF format..."; | |
105 } | |
106 | |
107 return $This; | |
108 } | |
109 | |
110 # Is it a SD file? | |
111 sub IsSDFile ($;$) { | |
112 my($FirstParameter, $SecondParameter) = @_; | |
113 my($This, $FileName, $Status); | |
114 | |
115 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { | |
116 ($This, $FileName) = ($FirstParameter, $SecondParameter); | |
117 } | |
118 else { | |
119 $FileName = $FirstParameter; | |
120 } | |
121 | |
122 # Check file extension... | |
123 $Status = FileUtil::CheckFileType($FileName, "sd sdf"); | |
124 | |
125 return $Status; | |
126 } | |
127 | |
128 # Read molecule from file and return molecule object... | |
129 sub ReadMolecule { | |
130 my($This) = @_; | |
131 my($FileHandle); | |
132 | |
133 $FileHandle = $This->GetFileHandle(); | |
134 return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle)); | |
135 } | |
136 | |
137 # Write compound data along with any data field label and values using Molecule object... | |
138 sub WriteMolecule { | |
139 my($This, $Molecule) = @_; | |
140 | |
141 if (!(defined($Molecule) && $Molecule->IsMolecule())) { | |
142 carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified..."; | |
143 return $This; | |
144 } | |
145 my($FileHandle); | |
146 $FileHandle = $This->GetFileHandle(); | |
147 | |
148 print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n"; | |
149 | |
150 return $This; | |
151 } | |
152 | |
153 # Retrieve molecule string... | |
154 sub ReadMoleculeString { | |
155 my($This) = @_; | |
156 my($FileHandle); | |
157 | |
158 $FileHandle = $This->GetFileHandle(); | |
159 return SDFileUtil::ReadCmpdString($FileHandle); | |
160 } | |
161 | |
162 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class | |
163 # method or a package function. | |
164 # | |
165 sub ParseMoleculeString { | |
166 my($FirstParameter, $SecondParameter) = @_; | |
167 my($This, $MoleculeString); | |
168 | |
169 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { | |
170 ($This, $MoleculeString) = ($FirstParameter, $SecondParameter); | |
171 } | |
172 else { | |
173 $MoleculeString = $FirstParameter; | |
174 $This = undef; | |
175 } | |
176 if (!$MoleculeString) { | |
177 return undef; | |
178 } | |
179 # Parse molecule data... | |
180 my($Molecule); | |
181 $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString); | |
182 | |
183 # Process data label/value pairs... | |
184 my(@MoleculeLines, @DataLabels, %DataLabelsAndValues); | |
185 | |
186 %DataLabelsAndValues = (); | |
187 @MoleculeLines = split /\n/, $MoleculeString; | |
188 @DataLabels = SDFileUtil::GetCmpdDataHeaderLabels(\@MoleculeLines); | |
189 %DataLabelsAndValues = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@MoleculeLines); | |
190 | |
191 # Store reference to data labels to keep track of their initial order in SD file... | |
192 $Molecule->SetDataFieldLabels(\@DataLabels); | |
193 | |
194 # Store reference to SD data label/value pairs hash as a generic property of molecule... | |
195 $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues); | |
196 | |
197 return $Molecule; | |
198 } | |
199 | |
200 # Generate molecule string using molecule object... | |
201 sub GenerateMoleculeString { | |
202 my($FirstParameter, $SecondParameter) = @_; | |
203 my($This, $Molecule); | |
204 | |
205 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { | |
206 ($This, $Molecule) = ($FirstParameter, $SecondParameter); | |
207 } | |
208 else { | |
209 $Molecule = $FirstParameter; | |
210 $This = undef; | |
211 } | |
212 if (!defined($Molecule)) { | |
213 return undef; | |
214 } | |
215 # Generate CTAB data... | |
216 my($CmpdString); | |
217 $CmpdString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule); | |
218 | |
219 # Generate any data field labels and values... | |
220 my($DataFieldLabelsAndValuesString); | |
221 | |
222 $DataFieldLabelsAndValuesString = ''; | |
223 if ($Molecule->HasProperty('DataFieldLabels')) { | |
224 my($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields); | |
225 | |
226 $SortDataFields = (exists($This->{SortDataFieldsDuringOutput}) && $This->{SortDataFieldsDuringOutput} =~ /^Yes$/i) ? 1 : 0; | |
227 | |
228 $DataFieldLabelsRef = $Molecule->GetDataFieldLabels(); | |
229 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues(); | |
230 $DataFieldLabelsAndValuesString = join "\n", SDFileUtil::GenerateCmpdDataHeaderLabelsAndValuesLines($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields); | |
231 } | |
232 | |
233 return "${CmpdString }\n${DataFieldLabelsAndValuesString}\n\$\$\$\$"; | |
234 } | |
235 | |
236 | |
237 # Is it a SDFileIO object? | |
238 sub _IsSDFileIO { | |
239 my($Object) = @_; | |
240 | |
241 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
242 } | |
243 | |
244 1; | |
245 | |
246 __END__ | |
247 | |
248 =head1 NAME | |
249 | |
250 SDFileIO | |
251 | |
252 =head1 SYNOPSIS | |
253 | |
254 use FileIO::SDFileIO; | |
255 | |
256 use FileIO::SDFileIO qw(:all); | |
257 | |
258 =head1 DESCRIPTION | |
259 | |
260 B<SDFIleIO> class provides the following methods: | |
261 | |
262 new, GenerateMoleculeString, IsSDFile, ParseMoleculeString, ReadMolecule, | |
263 ReadMoleculeString, WriteMolecule | |
264 | |
265 The following methods can also be used as functions: | |
266 | |
267 GenerateMoleculeString, IsSDFile, ParseMoleculeString | |
268 | |
269 Data specific to B<SDFileIO> class not directly used by B<Molecule>, B<Atom> and | |
270 B<Bond> objects - data label/value pairs, atom SteroParity and so on - is associated to | |
271 and retrieved from approptiate objects using following methods: | |
272 | |
273 SetMDL<PropertyName> | |
274 GetMDL<PropertyName>. | |
275 | |
276 SD data label and values are attached to B<Molecule> object as a refernece to a hash | |
277 using SetDataFieldLabelAndValues and can be retrieved using GetDataFieldLabelAndValues | |
278 method. | |
279 | |
280 B<SDFileIO> class is derived from I<FileIO> class and uses its methods to support | |
281 generic file related functionality. | |
282 | |
283 =head2 METHODS | |
284 | |
285 =over 4 | |
286 | |
287 =item B<new> | |
288 | |
289 $NewSDFileIO = new FileIO::SDFileIO(%NamesAndValues); | |
290 | |
291 Using specified I<SDFileIO> property names and values hash, B<new> method creates a new object | |
292 and returns a reference to newly created B<SDFileIO> object. | |
293 | |
294 =item B<GenerateMoleculeString> | |
295 | |
296 $MoleculeString = $SDFileIO->GenerateMoleculeString($Molecule); | |
297 $MoleculeString = FileIO::SDFileIO::GenerateMoleculeString($Molecule); | |
298 | |
299 Returns a B<MoleculeString> in SD format corresponding to I<Molecule>. | |
300 | |
301 =item B<IsSDFile> | |
302 | |
303 $Status = $SDFileIO->IsSDFile($FileName); | |
304 $Status = FileIO::SDFileIO::IsSDFile($FileName); | |
305 | |
306 Returns 1 or 0 based on whether I<FileName> is a SD file. | |
307 | |
308 =item B<ParseMoleculeString> | |
309 | |
310 $Molecule = $SDFileIO->ParseMoleculeString($MoleculeString); | |
311 $Molecule = FileIO::SDFileIO::ParseMoleculeString($MoleculeString); | |
312 | |
313 Parses I<MoleculeString> and returns a B<Molecule> object. SD data field label and value pairs | |
314 are associated to B<Molecule> object as a reference to a hash using: | |
315 | |
316 $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues) | |
317 | |
318 The reference to hash can be retrieved by: | |
319 | |
320 $DataLabelsAndValues = $Molecule->GetDataFieldLabelAndValues(); | |
321 for $DataLabel (sort keys %{$DataLabelsAndValues}) { | |
322 $DataValue = $DataLabelsAndValues->{$DataLabel}; | |
323 } | |
324 | |
325 =item B<ReadMolecule> | |
326 | |
327 $Molecule = $SDFileIO->ReadMolecule($FileHandle); | |
328 | |
329 Reads data for the next compound in a file using already opened I<FileHandle>, creates, | |
330 and returns a B<Molecule> object. | |
331 | |
332 =item B<ReadMoleculeString> | |
333 | |
334 $MoleculeString = $SDFileIO->ReadMoleculeString($FileHandle); | |
335 | |
336 Reads data for the next compound in a file using already opened I<FileHandle> and | |
337 returns a B<MoleculeString> corresponding to compound structure and other associated | |
338 data. | |
339 | |
340 =item B<WriteMolecule> | |
341 | |
342 $SDFileIO->WriteMolecule($Molecule); | |
343 | |
344 Writes I<Molecule> data to a file in MDLMol format and returns B<SDFileIO>. | |
345 | |
346 =back | |
347 | |
348 =head1 AUTHOR | |
349 | |
350 Manish Sud <msud@san.rr.com> | |
351 | |
352 =head1 SEE ALSO | |
353 | |
354 MoleculeFileIO.pm, MDLMolFileIO.pm | |
355 | |
356 =head1 COPYRIGHT | |
357 | |
358 Copyright (C) 2015 Manish Sud. All rights reserved. | |
359 | |
360 This file is part of MayaChemTools. | |
361 | |
362 MayaChemTools is free software; you can redistribute it and/or modify it under | |
363 the terms of the GNU Lesser General Public License as published by the Free | |
364 Software Foundation; either version 3 of the License, or (at your option) | |
365 any later version. | |
366 | |
367 =cut |