Mercurial > repos > deepakjadmin > mayatool3_test2
diff lib/FileIO/SDFileIO.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/FileIO/SDFileIO.pm Wed Jan 20 09:23:18 2016 -0500 @@ -0,0 +1,367 @@ +package FileIO::SDFileIO; +# +# $RCSfile: SDFileIO.pm,v $ +# $Date: 2015/02/28 20:48:43 $ +# $Revision: 1.35 $ +# +# Author: Manish Sud <msud@san.rr.com> +# +# Copyright (C) 2015 Manish Sud. All rights reserved. +# +# This file is part of MayaChemTools. +# +# MayaChemTools is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation; either version 3 of the License, or (at your option) any +# later version. +# +# MayaChemTools is distributed in the hope that it will be useful, but without +# any warranty; without even the implied warranty of merchantability of fitness +# for a particular purpose. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, +# Boston, MA, 02111-1307, USA. +# + +use strict; +use Carp; +use Exporter; +use Scalar::Util (); +use TextUtil (); +use FileUtil (); +use SDFileUtil (); +use FileIO::FileIO; +use FileIO::MDLMolFileIO; +use Molecule; + +use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); + +@ISA = qw(FileIO::FileIO Exporter); +@EXPORT = qw(); +@EXPORT_OK = qw(IsSDFile); + +%EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); + +# Setup class variables... +my($ClassName); +_InitializeClass(); + +# Class constructor... +sub new { + my($Class, %NamesAndValues) = @_; + + # Initialize object... + my $This = $Class->SUPER::new(); + bless $This, ref($Class) || $Class; + $This->_InitializeSDFileIO(); + + $This->_InitializeSDFileIOProperties(%NamesAndValues); + + return $This; +} + +# Initialize any local object data... +# +sub _InitializeSDFileIO { + my($This) = @_; + + # Sorting of MDL data fields during output: Keep the initial order or write 'em out alphabetically... + $This->{SortDataFieldsDuringOutput} = 'No'; + + return $This; +} + +# Initialize class ... +sub _InitializeClass { + #Class name... + $ClassName = __PACKAGE__; + +} + +# Initialize object values... +sub _InitializeSDFileIOProperties { + my($This, %NamesAndValues) = @_; + + # All other property names and values along with all Set/Get<PropertyName> methods + # are implemented on-demand using ObjectProperty class. + + my($Name, $Value, $MethodName); + while (($Name, $Value) = each %NamesAndValues) { + $MethodName = "Set${Name}"; + $This->$MethodName($Value); + } + + if (!exists $NamesAndValues{Name}) { + croak "Error: ${ClassName}->New: Object can't be instantiated without specifying file name..."; + } + + # Make sure it's a SD file... + $Name = $NamesAndValues{Name}; + if (!$This->IsSDFile($Name)) { + croak "Error: ${ClassName}->New: Object can't be instantiated: File, $Name, doesn't appear to be SDF format..."; + } + + return $This; +} + +# Is it a SD file? +sub IsSDFile ($;$) { + my($FirstParameter, $SecondParameter) = @_; + my($This, $FileName, $Status); + + if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { + ($This, $FileName) = ($FirstParameter, $SecondParameter); + } + else { + $FileName = $FirstParameter; + } + + # Check file extension... + $Status = FileUtil::CheckFileType($FileName, "sd sdf"); + + return $Status; +} + +# Read molecule from file and return molecule object... +sub ReadMolecule { + my($This) = @_; + my($FileHandle); + + $FileHandle = $This->GetFileHandle(); + return $This->ParseMoleculeString(SDFileUtil::ReadCmpdString($FileHandle)); +} + +# Write compound data along with any data field label and values using Molecule object... +sub WriteMolecule { + my($This, $Molecule) = @_; + + if (!(defined($Molecule) && $Molecule->IsMolecule())) { + carp "Warning: ${ClassName}->WriteMolecule: No data written: Molecule object is not specified..."; + return $This; + } + my($FileHandle); + $FileHandle = $This->GetFileHandle(); + + print $FileHandle $This->GenerateMoleculeString($Molecule) . "\n"; + + return $This; +} + +# Retrieve molecule string... +sub ReadMoleculeString { + my($This) = @_; + my($FileHandle); + + $FileHandle = $This->GetFileHandle(); + return SDFileUtil::ReadCmpdString($FileHandle); +} + +# Parse molecule string and return molecule object. ParseMoleculeString supports two invocation methods: class +# method or a package function. +# +sub ParseMoleculeString { + my($FirstParameter, $SecondParameter) = @_; + my($This, $MoleculeString); + + if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { + ($This, $MoleculeString) = ($FirstParameter, $SecondParameter); + } + else { + $MoleculeString = $FirstParameter; + $This = undef; + } + if (!$MoleculeString) { + return undef; + } + # Parse molecule data... + my($Molecule); + $Molecule = FileIO::MDLMolFileIO::ParseMoleculeString($MoleculeString); + + # Process data label/value pairs... + my(@MoleculeLines, @DataLabels, %DataLabelsAndValues); + + %DataLabelsAndValues = (); + @MoleculeLines = split /\n/, $MoleculeString; + @DataLabels = SDFileUtil::GetCmpdDataHeaderLabels(\@MoleculeLines); + %DataLabelsAndValues = SDFileUtil::GetCmpdDataHeaderLabelsAndValues(\@MoleculeLines); + + # Store reference to data labels to keep track of their initial order in SD file... + $Molecule->SetDataFieldLabels(\@DataLabels); + + # Store reference to SD data label/value pairs hash as a generic property of molecule... + $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues); + + return $Molecule; +} + +# Generate molecule string using molecule object... +sub GenerateMoleculeString { + my($FirstParameter, $SecondParameter) = @_; + my($This, $Molecule); + + if ((@_ == 2) && (_IsSDFileIO($FirstParameter))) { + ($This, $Molecule) = ($FirstParameter, $SecondParameter); + } + else { + $Molecule = $FirstParameter; + $This = undef; + } + if (!defined($Molecule)) { + return undef; + } + # Generate CTAB data... + my($CmpdString); + $CmpdString = FileIO::MDLMolFileIO::GenerateMoleculeString($Molecule); + + # Generate any data field labels and values... + my($DataFieldLabelsAndValuesString); + + $DataFieldLabelsAndValuesString = ''; + if ($Molecule->HasProperty('DataFieldLabels')) { + my($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields); + + $SortDataFields = (exists($This->{SortDataFieldsDuringOutput}) && $This->{SortDataFieldsDuringOutput} =~ /^Yes$/i) ? 1 : 0; + + $DataFieldLabelsRef = $Molecule->GetDataFieldLabels(); + $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues(); + $DataFieldLabelsAndValuesString = join "\n", SDFileUtil::GenerateCmpdDataHeaderLabelsAndValuesLines($DataFieldLabelsRef, $DataFieldLabelAndValuesRef, $SortDataFields); + } + + return "${CmpdString }\n${DataFieldLabelsAndValuesString}\n\$\$\$\$"; +} + + +# Is it a SDFileIO object? +sub _IsSDFileIO { + my($Object) = @_; + + return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; +} + +1; + +__END__ + +=head1 NAME + +SDFileIO + +=head1 SYNOPSIS + +use FileIO::SDFileIO; + +use FileIO::SDFileIO qw(:all); + +=head1 DESCRIPTION + +B<SDFIleIO> class provides the following methods: + +new, GenerateMoleculeString, IsSDFile, ParseMoleculeString, ReadMolecule, +ReadMoleculeString, WriteMolecule + +The following methods can also be used as functions: + +GenerateMoleculeString, IsSDFile, ParseMoleculeString + +Data specific to B<SDFileIO> class not directly used by B<Molecule>, B<Atom> and +B<Bond> objects - data label/value pairs, atom SteroParity and so on - is associated to +and retrieved from approptiate objects using following methods: + + SetMDL<PropertyName> + GetMDL<PropertyName>. + +SD data label and values are attached to B<Molecule> object as a refernece to a hash +using SetDataFieldLabelAndValues and can be retrieved using GetDataFieldLabelAndValues +method. + +B<SDFileIO> class is derived from I<FileIO> class and uses its methods to support +generic file related functionality. + +=head2 METHODS + +=over 4 + +=item B<new> + + $NewSDFileIO = new FileIO::SDFileIO(%NamesAndValues); + +Using specified I<SDFileIO> property names and values hash, B<new> method creates a new object +and returns a reference to newly created B<SDFileIO> object. + +=item B<GenerateMoleculeString> + + $MoleculeString = $SDFileIO->GenerateMoleculeString($Molecule); + $MoleculeString = FileIO::SDFileIO::GenerateMoleculeString($Molecule); + +Returns a B<MoleculeString> in SD format corresponding to I<Molecule>. + +=item B<IsSDFile> + + $Status = $SDFileIO->IsSDFile($FileName); + $Status = FileIO::SDFileIO::IsSDFile($FileName); + +Returns 1 or 0 based on whether I<FileName> is a SD file. + +=item B<ParseMoleculeString> + + $Molecule = $SDFileIO->ParseMoleculeString($MoleculeString); + $Molecule = FileIO::SDFileIO::ParseMoleculeString($MoleculeString); + +Parses I<MoleculeString> and returns a B<Molecule> object. SD data field label and value pairs +are associated to B<Molecule> object as a reference to a hash using: + + $Molecule->SetDataFieldLabelAndValues(\%DataLabelsAndValues) + +The reference to hash can be retrieved by: + + $DataLabelsAndValues = $Molecule->GetDataFieldLabelAndValues(); + for $DataLabel (sort keys %{$DataLabelsAndValues}) { + $DataValue = $DataLabelsAndValues->{$DataLabel}; + } + +=item B<ReadMolecule> + + $Molecule = $SDFileIO->ReadMolecule($FileHandle); + +Reads data for the next compound in a file using already opened I<FileHandle>, creates, +and returns a B<Molecule> object. + +=item B<ReadMoleculeString> + + $MoleculeString = $SDFileIO->ReadMoleculeString($FileHandle); + +Reads data for the next compound in a file using already opened I<FileHandle> and +returns a B<MoleculeString> corresponding to compound structure and other associated +data. + +=item B<WriteMolecule> + + $SDFileIO->WriteMolecule($Molecule); + +Writes I<Molecule> data to a file in MDLMol format and returns B<SDFileIO>. + +=back + +=head1 AUTHOR + +Manish Sud <msud@san.rr.com> + +=head1 SEE ALSO + +MoleculeFileIO.pm, MDLMolFileIO.pm + +=head1 COPYRIGHT + +Copyright (C) 2015 Manish Sud. All rights reserved. + +This file is part of MayaChemTools. + +MayaChemTools is free software; you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + +=cut