1 package FileIO::SDFileIO; 2 # 3 # $RCSfile: SDFileIO.pm,v $ 4 # $Date: 2015/02/28 20:48:43 $ 5 # $Revision: 1.35 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use Scalar::Util (); 33 use TextUtil (); 34 use FileUtil (); 35 use SDFileUtil (); 36 use FileIO::FileIO; 37 use FileIO::MDLMolFileIO; 38 use Molecule; 39 40 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 41 42 @ISA = qw(FileIO::FileIO Exporter); 43 @EXPORT = qw(); 44 @EXPORT_OK = qw(IsSDFile); 45 46 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 47 48 # Setup class variables... 49 my($ClassName); 50 _InitializeClass(); 51 52 # Class constructor... 53 sub new { 54 my($Class, %NamesAndValues) = @_; 55 56 # Initialize object... 57 my $This = $Class->SUPER::new(); 58 bless $This, ref($Class) || $Class; 59 $This->_InitializeSDFileIO(); 60 61 $This->_InitializeSDFileIOProperties(%NamesAndValues); 62 63 return $This; 64 } 65 66 # Initialize any local object data... 67 # 68 sub _InitializeSDFileIO { 69 my($This) = @_; 70 71 # Sorting of MDL data fields during output: Keep the initial order or write 'em out alphabetically... 72 $This->{SortDataFieldsDuringOutput} = 'No'; 73 74 return $This; 75 } 76 77 # Initialize class ... 78 sub _InitializeClass { 79 #Class name... 80 $ClassName = __PACKAGE__; 81 82 } 83 84 # Initialize object values... 85 sub _InitializeSDFileIOProperties { 86 my($This, %NamesAndValues) = @_; 87 88 # All other property names and values along with all Set/Get<PropertyName> methods 89 # are implemented on-demand using ObjectProperty class. 90 91 my($Name, $Value, $MethodName); 92 while (($Name, $Value) = each %NamesAndValues) { 93 $MethodName = "Set${Name}"; 94 $This->$MethodName($Value); 95 } 96 97 if (!exists $NamesAndValues{Name}) { 98 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; 99 } 100 101 # Make sure it's a SD file... 102 $Name = $NamesAndValues{Name}; 103 if (!$This->IsSDFile($Name)) { 104 croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be SDF format..."; 105 } 106 107 return $This; 108 } 109 110 # Is it a SD file? 111 sub IsSDFile ($;$) { 112 my($FirstParameter, $SecondParameter) = @_; 113 my($This, $FileName, $Status); 114 115 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { 116 ($This, $FileName) = ($FirstParameter, $SecondParameter); 117 } 118 else { 119 $FileName = $FirstParameter; 120 } 121 122 # Check file extension... 123 $Status = FileUtil::CheckFileType($FileName, "sd sdf"); 124 125 return $Status; 126 } 127 128 # Read molecule from file and return molecule object... 129 sub ReadMolecule { 130 my($This) = @_; 131 my($FileHandle); 132 133 $FileHandle = $This->GetFileHandle(); 134 return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle)); 135 } 136 137 # Write compound data along with any data field label and values using Molecule object... 138 sub WriteMolecule { 139 my($This, $Molecule) = @_; 140 141 if (!(defined($Molecule) && $Molecule->IsMolecule())) { 142 carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified..."; 143 return $This; 144 } 145 my($FileHandle); 146 $FileHandle = $This->GetFileHandle(); 147 148 print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n"; 149 150 return $This; 151 } 152 153 # Retrieve molecule string... 154 sub ReadMoleculeString { 155 my($This) = @_; 156 my($FileHandle); 157 158 $FileHandle = $This->GetFileHandle(); 159 return SDFileUtil::ReadCmpdString($FileHandle); 160 } 161 162 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class 163 # method or a package function. 164 # 165 sub ParseMoleculeString { 166 my($FirstParameter, $SecondParameter) = @_; 167 my($This, $MoleculeString); 168 169 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { 170 ($This, $MoleculeString) = ($FirstParameter, $SecondParameter); 171 } 172 else { 173 $MoleculeString = $FirstParameter; 174 $This = undef; 175 } 176 if (!$MoleculeString) { 177 return undef; 178 } 179 # Parse molecule data... 180 my($Molecule); 181 $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString); 182 183 # Process data label/value pairs... 184 my(@MoleculeLines, @DataLabels, %DataLabelsAndValues); 185 186 %DataLabelsAndValues = (); 187 @MoleculeLines = split /\n/, $MoleculeString; 188 @DataLabels = SDFileUtil::GetCmpdDataHeaderLabels(\@MoleculeLines); 189 %DataLabelsAndValues = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@MoleculeLines); 190 191 # Store reference to data labels to keep track of their initial order in SD file... 192 $Molecule->SetDataFieldLabels(\@DataLabels); 193 194 # Store reference to SD data label/value pairs hash as a generic property of molecule... 195 $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues); 196 197 return $Molecule; 198 } 199 200 # Generate molecule string using molecule object... 201 sub GenerateMoleculeString { 202 my($FirstParameter, $SecondParameter) = @_; 203 my($This, $Molecule); 204 205 if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { 206 ($This, $Molecule) = ($FirstParameter, $SecondParameter); 207 } 208 else { 209 $Molecule = $FirstParameter; 210 $This = undef; 211 } 212 if (!defined($Molecule)) { 213 return undef; 214 } 215 # Generate CTAB data... 216 my($CmpdString); 217 $CmpdString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule); 218 219 # Generate any data field labels and values... 220 my($DataFieldLabelsAndValuesString); 221 222 $DataFieldLabelsAndValuesString = ''; 223 if ($Molecule->HasProperty('DataFieldLabels')) { 224 my($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields); 225 226 $SortDataFields = (exists($This->{SortDataFieldsDuringOutput}) && $This->{SortDataFieldsDuringOutput} =~ /^Yes$/i) ? 1 : 0; 227 228 $DataFieldLabelsRef = $Molecule->GetDataFieldLabels(); 229 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues(); 230 $DataFieldLabelsAndValuesString = join "\n", SDFileUtil::GenerateCmpdDataHeaderLabelsAndValuesLines($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields); 231 } 232 233 return "${CmpdString }\n${DataFieldLabelsAndValuesString}\n\$\$\$\$"; 234 } 235 236 237 # Is it a SDFileIO object? 238 sub _IsSDFileIO { 239 my($Object) = @_; 240 241 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; 242 } 243