MayaChemTools

   1 package FileIO::SDFileIO;
   2 #
   3 # $RCSfile: SDFileIO.pm,v $
   4 # $Date: 2015/02/28 20:48:43 $
   5 # $Revision: 1.35 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Scalar::Util ();
  33 use TextUtil ();
  34 use FileUtil ();
  35 use SDFileUtil ();
  36 use FileIO::FileIO;
  37 use FileIO::MDLMolFileIO;
  38 use Molecule;
  39 
  40 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  41 
  42 @ISA = qw(FileIO::FileIO Exporter);
  43 @EXPORT = qw();
  44 @EXPORT_OK = qw(IsSDFile);
  45 
  46 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  47 
  48 # Setup class variables...
  49 my($ClassName);
  50 _InitializeClass();
  51 
  52 # Class constructor...
  53 sub new {
  54   my($Class, %NamesAndValues) = @_;
  55 
  56   # Initialize object...
  57   my $This = $Class->SUPER::new();
  58   bless $This, ref($Class) || $Class;
  59   $This->_InitializeSDFileIO();
  60 
  61   $This->_InitializeSDFileIOProperties(%NamesAndValues);
  62 
  63   return $This;
  64 }
  65 
  66 # Initialize any local object data...
  67 #
  68 sub _InitializeSDFileIO {
  69   my($This) = @_;
  70 
  71   # Sorting of MDL data fields during output: Keep the initial order or write 'em out alphabetically...
  72   $This->{SortDataFieldsDuringOutput} = 'No';
  73 
  74   return $This;
  75 }
  76 
  77 # Initialize class ...
  78 sub _InitializeClass {
  79   #Class name...
  80   $ClassName = __PACKAGE__;
  81 
  82 }
  83 
  84 # Initialize object values...
  85 sub _InitializeSDFileIOProperties {
  86   my($This, %NamesAndValues) = @_;
  87 
  88   # All other property names and values along with all Set/Get<PropertyName> methods
  89   # are implemented on-demand using ObjectProperty class.
  90 
  91   my($Name, $Value, $MethodName);
  92   while (($Name, $Value) = each  %NamesAndValues) {
  93     $MethodName = "Set${Name}";
  94     $This->$MethodName($Value);
  95   }
  96 
  97   if (!exists $NamesAndValues{Name}) {
  98     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name...";
  99   }
 100 
 101   # Make sure it's a SD file...
 102   $Name = $NamesAndValues{Name};
 103   if (!$This->IsSDFile($Name)) {
 104     croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be SDF format...";
 105   }
 106 
 107   return $This;
 108 }
 109 
 110 # Is it a SD file?
 111 sub IsSDFile ($;$) {
 112   my($FirstParameter, $SecondParameter) = @_;
 113   my($This, $FileName, $Status);
 114 
 115   if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
 116     ($This, $FileName) = ($FirstParameter, $SecondParameter);
 117   }
 118   else {
 119     $FileName = $FirstParameter;
 120   }
 121 
 122   # Check file extension...
 123   $Status = FileUtil::CheckFileType($FileName, "sd sdf");
 124 
 125   return $Status;
 126 }
 127 
 128 # Read molecule from file and return molecule object...
 129 sub ReadMolecule {
 130   my($This) = @_;
 131   my($FileHandle);
 132 
 133   $FileHandle = $This->GetFileHandle();
 134   return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle));
 135 }
 136 
 137 # Write compound data along with any data field label and values using Molecule object...
 138 sub WriteMolecule {
 139   my($This, $Molecule) = @_;
 140 
 141   if (!(defined($Molecule) && $Molecule->IsMolecule())) {
 142     carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified...";
 143     return $This;
 144   }
 145   my($FileHandle);
 146   $FileHandle = $This->GetFileHandle();
 147 
 148   print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n";
 149 
 150   return $This;
 151 }
 152 
 153 # Retrieve molecule string...
 154 sub ReadMoleculeString {
 155   my($This) = @_;
 156   my($FileHandle);
 157 
 158   $FileHandle = $This->GetFileHandle();
 159   return SDFileUtil::ReadCmpdString($FileHandle);
 160 }
 161 
 162 # Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class
 163 # method or a package function.
 164 #
 165 sub ParseMoleculeString {
 166   my($FirstParameter, $SecondParameter) = @_;
 167   my($This, $MoleculeString);
 168 
 169   if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
 170     ($This, $MoleculeString) = ($FirstParameter, $SecondParameter);
 171   }
 172   else {
 173     $MoleculeString = $FirstParameter;
 174     $This = undef;
 175   }
 176   if (!$MoleculeString) {
 177     return undef;
 178   }
 179   # Parse molecule data...
 180   my($Molecule);
 181   $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString);
 182 
 183   # Process data label/value pairs...
 184   my(@MoleculeLines, @DataLabels, %DataLabelsAndValues);
 185 
 186   %DataLabelsAndValues = ();
 187   @MoleculeLines = split /\n/, $MoleculeString;
 188   @DataLabels = SDFileUtil::GetCmpdDataHeaderLabels(\@MoleculeLines);
 189   %DataLabelsAndValues = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@MoleculeLines);
 190 
 191   # Store reference to data labels to keep track of their initial order in SD file...
 192   $Molecule->SetDataFieldLabels(\@DataLabels);
 193 
 194   # Store reference to SD data label/value pairs hash as a generic property of molecule...
 195   $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues);
 196 
 197   return $Molecule;
 198 }
 199 
 200 # Generate molecule string using molecule object...
 201 sub GenerateMoleculeString {
 202   my($FirstParameter, $SecondParameter) = @_;
 203   my($This, $Molecule);
 204 
 205   if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) {
 206     ($This, $Molecule) = ($FirstParameter, $SecondParameter);
 207   }
 208   else {
 209     $Molecule = $FirstParameter;
 210     $This = undef;
 211   }
 212   if (!defined($Molecule)) {
 213     return undef;
 214   }
 215   # Generate CTAB data...
 216   my($CmpdString);
 217   $CmpdString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule);
 218 
 219   # Generate any data field labels and values...
 220   my($DataFieldLabelsAndValuesString);
 221 
 222   $DataFieldLabelsAndValuesString = '';
 223   if ($Molecule->HasProperty('DataFieldLabels')) {
 224     my($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields);
 225 
 226     $SortDataFields = (exists($This->{SortDataFieldsDuringOutput}) && $This->{SortDataFieldsDuringOutput} =~ /^Yes$/i) ? 1 : 0;
 227 
 228     $DataFieldLabelsRef = $Molecule->GetDataFieldLabels();
 229     $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
 230     $DataFieldLabelsAndValuesString = join "\n", SDFileUtil::GenerateCmpdDataHeaderLabelsAndValuesLines($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields);
 231   }
 232 
 233   return "${CmpdString }\n${DataFieldLabelsAndValuesString}\n\$\$\$\$";
 234 }
 235 
 236 
 237 # Is it a SDFileIO object?
 238 sub _IsSDFileIO {
 239   my($Object) = @_;
 240 
 241   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
 242 }
 243