Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/FileIO/SDFileIO.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
| author | deepakjadmin |
|---|---|
| date | Wed, 20 Jan 2016 09:23:18 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4816e4a8ae95 |
|---|---|
| 1 package FileIO::SDFileIO; | |
| 2 # | |
| 3 # $RCSfile: SDFileIO.pm,v $ | |
| 4 # $Date: 2015/02/28 20:48:43 $ | |
| 5 # $Revision: 1.35 $ | |
| 6 # | |
| 7 # Author: Manish Sud <msud@san.rr.com> | |
| 8 # | |
| 9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 10 # | |
| 11 # This file is part of MayaChemTools. | |
| 12 # | |
| 13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 14 # the terms of the GNU Lesser General Public License as published by the Free | |
| 15 # Software Foundation; either version 3 of the License, or (at your option) any | |
| 16 # later version. | |
| 17 # | |
| 18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
| 19 # any warranty; without even the implied warranty of merchantability of fitness | |
| 20 # for a particular purpose. See the GNU Lesser General Public License for more | |
| 21 # details. | |
| 22 # | |
| 23 # You should have received a copy of the GNU Lesser General Public License | |
| 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
| 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
| 26 # Boston, MA, 02111-1307, USA. | |
| 27 # | |
| 28 | |
| 29 use strict; | |
| 30 use Carp; | |
| 31 use Exporter; | |
| 32 use Scalar::Util (); | |
| 33 use TextUtil (); | |
| 34 use FileUtil (); | |
| 35 use SDFileUtil (); | |
| 36 use FileIO::FileIO; | |
| 37 use FileIO::MDLMolFileIO; | |
| 38 use Molecule; | |
| 39 | |
| 40 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
| 41 | |
| 42 @ISA = qw(FileIO::FileIO Exporter); | |
| 43 @EXPORT = qw(); | |
| 44 @EXPORT_OK = qw(IsSDFile); | |
| 45 | |
| 46 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
| 47 | |
| 48 # Setup class variables... | |
| 49 my($ClassName); | |
| 50 _InitializeClass(); | |
| 51 | |
| 52 # Class constructor... | |
| 53 sub new { | |
| 54 my($Class, %NamesAndValues) = @_; | |
| 55 | |
| 56 # Initialize object... | |
| 57 my $This = $Class->SUPER::new(); | |
| 58 bless $This, ref($Class) || $Class; | |
| 59 $This->_InitializeSDFileIO(); | |
| 60 | |
| 61 $This->_InitializeSDFileIOProperties(%NamesAndValues); | |
| 62 | |
| 63 return $This; | |
| 64 } | |
| 65 | |
| 66 # Initialize any local object data... | |
| 67 # | |
| 68 sub _InitializeSDFileIO { | |
| 69 my($This) = @_; | |
| 70 | |
| 71 # Sorting of MDL data fields during output: Keep the initial order or write 'em out alphabetically... | |
| 72 $This->{SortDataFieldsDuringOutput} = 'No'; | |
| 73 | |
| 74 return $This; | |
| 75 } | |
| 76 | |
| 77 # Initialize class ... | |
| 78 sub _InitializeClass { | |
| 79 #Class name... | |
| 80 $ClassName = __PACKAGE__; | |
| 81 | |
| 82 } | |
| 83 | |
| 84 # Initialize object values... | |
| 85 sub _InitializeSDFileIOProperties { | |
| 86 my($This, %NamesAndValues) = @_; | |
| 87 | |
| 88 # All other property names and values along with all Set/Get<PropertyName> methods | |
| 89 # are implemented on-demand using ObjectProperty class. | |
| 90 | |
| 91 my($Name, $Value, $MethodName); | |
| 92 while (($Name, $Value) = each %NamesAndValues) { | |
| 93 $MethodName = "Set${Name}"; | |
| 94 $This->$MethodName($Value); | |
| 95 } | |
| 96 | |
| 97 if (!exists $NamesAndValues{Name}) { | |
| 98 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; | |
| 99 } | |
| 100 | |
| 101 # Make sure it's a SD file... | |
| 102 $Name = $NamesAndValues{Name}; | |
| 103 if (!$This->IsSDFile($Name)) { | |
| 104 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be SDF format..."; | |
| 105 } | |
| 106 | |
| 107 return $This; | |
| 108 } | |
| 109 | |
| 110 # Is it a SD file? | |
| 111 sub IsSDFile ($;$) { | |
| 112 my($FirstParameter, $SecondParameter) = @_; | |
| 113 my($This, $FileName, $Status); | |
| 114 | |
| 115 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { | |
| 116 ($This, $FileName) = ($FirstParameter, $SecondParameter); | |
| 117 } | |
| 118 else { | |
| 119 $FileName = $FirstParameter; | |
| 120 } | |
| 121 | |
| 122 # Check file extension... | |
| 123 $Status = FileUtil::CheckFileType($FileName, "sd sdf"); | |
| 124 | |
| 125 return $Status; | |
| 126 } | |
| 127 | |
| 128 # Read molecule from file and return molecule object... | |
| 129 sub ReadMolecule { | |
| 130 my($This) = @_; | |
| 131 my($FileHandle); | |
| 132 | |
| 133 $FileHandle = $This->GetFileHandle(); | |
| 134 return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle)); | |
| 135 } | |
| 136 | |
| 137 # Write compound data along with any data field label and values using Molecule object... | |
| 138 sub WriteMolecule { | |
| 139 my($This, $Molecule) = @_; | |
| 140 | |
| 141 if (!(defined($Molecule) && $Molecule->IsMolecule())) { | |
| 142 carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified..."; | |
| 143 return $This; | |
| 144 } | |
| 145 my($FileHandle); | |
| 146 $FileHandle = $This->GetFileHandle(); | |
| 147 | |
| 148 print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n"; | |
| 149 | |
| 150 return $This; | |
| 151 } | |
| 152 | |
| 153 # Retrieve molecule string... | |
| 154 sub ReadMoleculeString { | |
| 155 my($This) = @_; | |
| 156 my($FileHandle); | |
| 157 | |
| 158 $FileHandle = $This->GetFileHandle(); | |
| 159 return SDFileUtil::ReadCmpdString($FileHandle); | |
| 160 } | |
| 161 | |
| 162 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class | |
| 163 # method or a package function. | |
| 164 # | |
| 165 sub ParseMoleculeString { | |
| 166 my($FirstParameter, $SecondParameter) = @_; | |
| 167 my($This, $MoleculeString); | |
| 168 | |
| 169 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { | |
| 170 ($This, $MoleculeString) = ($FirstParameter, $SecondParameter); | |
| 171 } | |
| 172 else { | |
| 173 $MoleculeString = $FirstParameter; | |
| 174 $This = undef; | |
| 175 } | |
| 176 if (!$MoleculeString) { | |
| 177 return undef; | |
| 178 } | |
| 179 # Parse molecule data... | |
| 180 my($Molecule); | |
| 181 $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString); | |
| 182 | |
| 183 # Process data label/value pairs... | |
| 184 my(@MoleculeLines, @DataLabels, %DataLabelsAndValues); | |
| 185 | |
| 186 %DataLabelsAndValues = (); | |
| 187 @MoleculeLines = split /\n/, $MoleculeString; | |
| 188 @DataLabels = SDFileUtil::GetCmpdDataHeaderLabels(\@MoleculeLines); | |
| 189 %DataLabelsAndValues = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@MoleculeLines); | |
| 190 | |
| 191 # Store reference to data labels to keep track of their initial order in SD file... | |
| 192 $Molecule->SetDataFieldLabels(\@DataLabels); | |
| 193 | |
| 194 # Store reference to SD data label/value pairs hash as a generic property of molecule... | |
| 195 $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues); | |
| 196 | |
| 197 return $Molecule; | |
| 198 } | |
| 199 | |
| 200 # Generate molecule string using molecule object... | |
| 201 sub GenerateMoleculeString { | |
| 202 my($FirstParameter, $SecondParameter) = @_; | |
| 203 my($This, $Molecule); | |
| 204 | |
| 205 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { | |
| 206 ($This, $Molecule) = ($FirstParameter, $SecondParameter); | |
| 207 } | |
| 208 else { | |
| 209 $Molecule = $FirstParameter; | |
| 210 $This = undef; | |
| 211 } | |
| 212 if (!defined($Molecule)) { | |
| 213 return undef; | |
| 214 } | |
| 215 # Generate CTAB data... | |
| 216 my($CmpdString); | |
| 217 $CmpdString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule); | |
| 218 | |
| 219 # Generate any data field labels and values... | |
| 220 my($DataFieldLabelsAndValuesString); | |
| 221 | |
| 222 $DataFieldLabelsAndValuesString = ''; | |
| 223 if ($Molecule->HasProperty('DataFieldLabels')) { | |
| 224 my($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields); | |
| 225 | |
| 226 $SortDataFields = (exists($This->{SortDataFieldsDuringOutput}) && $This->{SortDataFieldsDuringOutput} =~ /^Yes$/i) ? 1 : 0; | |
| 227 | |
| 228 $DataFieldLabelsRef = $Molecule->GetDataFieldLabels(); | |
| 229 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues(); | |
| 230 $DataFieldLabelsAndValuesString = join "\n", SDFileUtil::GenerateCmpdDataHeaderLabelsAndValuesLines($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields); | |
| 231 } | |
| 232 | |
| 233 return "${CmpdString }\n${DataFieldLabelsAndValuesString}\n\$\$\$\$"; | |
| 234 } | |
| 235 | |
| 236 | |
| 237 # Is it a SDFileIO object? | |
| 238 sub _IsSDFileIO { | |
| 239 my($Object) = @_; | |
| 240 | |
| 241 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
| 242 } | |
| 243 | |
| 244 1; | |
| 245 | |
| 246 __END__ | |
| 247 | |
| 248 =head1 NAME | |
| 249 | |
| 250 SDFileIO | |
| 251 | |
| 252 =head1 SYNOPSIS | |
| 253 | |
| 254 use FileIO::SDFileIO; | |
| 255 | |
| 256 use FileIO::SDFileIO qw(:all); | |
| 257 | |
| 258 =head1 DESCRIPTION | |
| 259 | |
| 260 B<SDFIleIO> class provides the following methods: | |
| 261 | |
| 262 new, GenerateMoleculeString, IsSDFile, ParseMoleculeString, ReadMolecule, | |
| 263 ReadMoleculeString, WriteMolecule | |
| 264 | |
| 265 The following methods can also be used as functions: | |
| 266 | |
| 267 GenerateMoleculeString, IsSDFile, ParseMoleculeString | |
| 268 | |
| 269 Data specific to B<SDFileIO> class not directly used by B<Molecule>, B<Atom> and | |
| 270 B<Bond> objects - data label/value pairs, atom SteroParity and so on - is associated to | |
| 271 and retrieved from approptiate objects using following methods: | |
| 272 | |
| 273 SetMDL<PropertyName> | |
| 274 GetMDL<PropertyName>. | |
| 275 | |
| 276 SD data label and values are attached to B<Molecule> object as a refernece to a hash | |
| 277 using SetDataFieldLabelAndValues and can be retrieved using GetDataFieldLabelAndValues | |
| 278 method. | |
| 279 | |
| 280 B<SDFileIO> class is derived from I<FileIO> class and uses its methods to support | |
| 281 generic file related functionality. | |
| 282 | |
| 283 =head2 METHODS | |
| 284 | |
| 285 =over 4 | |
| 286 | |
| 287 =item B<new> | |
| 288 | |
| 289 $NewSDFileIO = new FileIO::SDFileIO(%NamesAndValues); | |
| 290 | |
| 291 Using specified I<SDFileIO> property names and values hash, B<new> method creates a new object | |
| 292 and returns a reference to newly created B<SDFileIO> object. | |
| 293 | |
| 294 =item B<GenerateMoleculeString> | |
| 295 | |
| 296 $MoleculeString = $SDFileIO->GenerateMoleculeString($Molecule); | |
| 297 $MoleculeString = FileIO::SDFileIO::GenerateMoleculeString($Molecule); | |
| 298 | |
| 299 Returns a B<MoleculeString> in SD format corresponding to I<Molecule>. | |
| 300 | |
| 301 =item B<IsSDFile> | |
| 302 | |
| 303 $Status = $SDFileIO->IsSDFile($FileName); | |
| 304 $Status = FileIO::SDFileIO::IsSDFile($FileName); | |
| 305 | |
| 306 Returns 1 or 0 based on whether I<FileName> is a SD file. | |
| 307 | |
| 308 =item B<ParseMoleculeString> | |
| 309 | |
| 310 $Molecule = $SDFileIO->ParseMoleculeString($MoleculeString); | |
| 311 $Molecule = FileIO::SDFileIO::ParseMoleculeString($MoleculeString); | |
| 312 | |
| 313 Parses I<MoleculeString> and returns a B<Molecule> object. SD data field label and value pairs | |
| 314 are associated to B<Molecule> object as a reference to a hash using: | |
| 315 | |
| 316 $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues) | |
| 317 | |
| 318 The reference to hash can be retrieved by: | |
| 319 | |
| 320 $DataLabelsAndValues = $Molecule->GetDataFieldLabelAndValues(); | |
| 321 for $DataLabel (sort keys %{$DataLabelsAndValues}) { | |
| 322 $DataValue = $DataLabelsAndValues->{$DataLabel}; | |
| 323 } | |
| 324 | |
| 325 =item B<ReadMolecule> | |
| 326 | |
| 327 $Molecule = $SDFileIO->ReadMolecule($FileHandle); | |
| 328 | |
| 329 Reads data for the next compound in a file using already opened I<FileHandle>, creates, | |
| 330 and returns a B<Molecule> object. | |
| 331 | |
| 332 =item B<ReadMoleculeString> | |
| 333 | |
| 334 $MoleculeString = $SDFileIO->ReadMoleculeString($FileHandle); | |
| 335 | |
| 336 Reads data for the next compound in a file using already opened I<FileHandle> and | |
| 337 returns a B<MoleculeString> corresponding to compound structure and other associated | |
| 338 data. | |
| 339 | |
| 340 =item B<WriteMolecule> | |
| 341 | |
| 342 $SDFileIO->WriteMolecule($Molecule); | |
| 343 | |
| 344 Writes I<Molecule> data to a file in MDLMol format and returns B<SDFileIO>. | |
| 345 | |
| 346 =back | |
| 347 | |
| 348 =head1 AUTHOR | |
| 349 | |
| 350 Manish Sud <msud@san.rr.com> | |
| 351 | |
| 352 =head1 SEE ALSO | |
| 353 | |
| 354 MoleculeFileIO.pm, MDLMolFileIO.pm | |
| 355 | |
| 356 =head1 COPYRIGHT | |
| 357 | |
| 358 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 359 | |
| 360 This file is part of MayaChemTools. | |
| 361 | |
| 362 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 363 the terms of the GNU Lesser General Public License as published by the Free | |
| 364 Software Foundation; either version 3 of the License, or (at your option) | |
| 365 any later version. | |
| 366 | |
| 367 =cut |
