MayaChemTools

   1 package Fingerprints::FingerprintsFileUtil;
   2 #
   3 # $RCSfile: FingerprintsFileUtil.pm,v $
   4 # $Date: 2015/02/28 20:48:54 $
   5 # $Revision: 1.14 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Exporter;
  31 use Carp;
  32 use TextUtil ();
  33 use FileUtil ();
  34 use FileIO::FingerprintsSDFileIO;
  35 use FileIO::FingerprintsTextFileIO;
  36 use FileIO::FingerprintsFPFileIO;
  37 
  38 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  39 
  40 @ISA = qw(Exporter);
  41 @EXPORT = qw();
  42 @EXPORT_OK = qw(GetFingerprintsFileType ReadAndProcessFingerpritsData  NewFingerprintsFileIO);
  43 
  44 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  45 
  46 # Generate new FingerprintsFileIO object for a SD, FP or Text fingerprints file specified using file name
  47 # along other appropriate parameters...
  48 #
  49 sub NewFingerprintsFileIO {
  50   my(%FingerprintsFileIOParams) = @_;
  51   my($FingerprintsFileIO, $FileType);
  52 
  53   if (!(exists($FingerprintsFileIOParams{Name}) && TextUtil::IsNotEmpty($FingerprintsFileIOParams{Name}))) {
  54     carp "Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File name is not specified...\n";
  55     return undef;
  56   }
  57 
  58   if (!(exists($FingerprintsFileIOParams{Mode}) && TextUtil::IsNotEmpty($FingerprintsFileIOParams{Mode}))) {
  59     carp "Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File mode is not specified...\n";
  60     return undef;
  61   }
  62 
  63   $FileType = GetFingerprintsFileType($FingerprintsFileIOParams{Name});
  64   if (TextUtil::IsEmpty($FileType)) {
  65     carp "Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File type is not specified...\n";
  66     return undef;
  67   }
  68 
  69   # Generate fingerprints IO object...
  70   FILETYPE: {
  71     if ($FileType =~ /^SD$/i) {
  72       $FingerprintsFileIO = new FileIO::FingerprintsSDFileIO(%FingerprintsFileIOParams);
  73       last FILETYPE;
  74     }
  75     if ($FileType =~ /^FP$/i) {
  76       $FingerprintsFileIO = new FileIO::FingerprintsFPFileIO(%FingerprintsFileIOParams);
  77       last FILETYPE;
  78     }
  79     if ($FileType =~ /^Text$/i) {
  80       $FingerprintsFileIO = new FileIO::FingerprintsTextFileIO(%FingerprintsFileIOParams);
  81       last FILETYPE;
  82     }
  83     $FingerprintsFileIO = undef;
  84     carp "Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Fingerprints file type, $FileType, is not valid. Supported file types: SD, FP or Text\n";
  85   }
  86 
  87   return $FingerprintsFileIO;
  88 }
  89 
  90 # Get fingerpritns file type from fingerprints file name...
  91 #
  92 sub GetFingerprintsFileType {
  93   my($FileName) = @_;
  94   my($FileType);
  95 
  96   $FileType = '';
  97   FILETYPE: {
  98     if (FileUtil::CheckFileType($FileName, "sdf sd")) {
  99       $FileType = 'SD';
 100       last FILETYPE;
 101     }
 102     if (FileUtil::CheckFileType($FileName, "fpf fp")) {
 103       $FileType = 'FP';
 104       last FILETYPE;
 105     }
 106     if (FileUtil::CheckFileType($FileName, "csv tsv")) {
 107       $FileType = 'Text';
 108       last FILETYPE;
 109     }
 110     $FileType = '';
 111     carp "Warning: Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType: Can't determine fingerprints file type for $FileName: It's not a fingerprints file...\n";
 112   }
 113 
 114   return $FileType;
 115 }
 116 
 117 
 118 # Process fingerprints bit-vector and vector string data in a file using FingerprintsFileIO
 119 # object and return a references to arrays of CompoundIDs and FingerprintsObjects...
 120 #
 121 # Note:
 122 #  . The file open and close is automatically performed during processing.
 123 #
 124 sub ReadAndProcessFingerpritsData {
 125   my($FingerprintsFileIO, $CheckCompoundIDs) = @_;
 126   my($CompoundID, $FingerprintsCount, $IgnoredFingerprintsCount, @CompundIDs, @FingerprintsObjects, %UniqueCompoundIDs);
 127 
 128   if (!$FingerprintsFileIO) {
 129     return (undef, undef);
 130   }
 131   $CheckCompoundIDs = defined $CheckCompoundIDs ? $CheckCompoundIDs : 0;
 132 
 133   print "\nReading and processing fingerprints data...\n";
 134 
 135   ($FingerprintsCount, $IgnoredFingerprintsCount) = (0) x 3;
 136 
 137   @CompundIDs = ();
 138   @FingerprintsObjects = ();
 139 
 140   %UniqueCompoundIDs = ();
 141 
 142   # Check and open file for reading...
 143   if (!$FingerprintsFileIO->GetStatus()) {
 144     $FingerprintsFileIO->Open();
 145   }
 146 
 147   FINGERPRINTS: while ($FingerprintsFileIO->Read()) {
 148     $FingerprintsCount++;
 149 
 150     if (!$FingerprintsFileIO->IsFingerprintsDataValid()) {
 151       $IgnoredFingerprintsCount++;
 152       next FINGERPRINTS;
 153     }
 154 
 155     if ($CheckCompoundIDs) {
 156       $CompoundID = $FingerprintsFileIO->GetCompoundID();
 157       if (exists $UniqueCompoundIDs{$CompoundID}) {
 158         warn "Warning: Ignoring fingerprints data for compound ID $CompoundID: Multiple entries for compound ID in fingerprints file.\n";
 159         $IgnoredFingerprintsCount++;
 160         next FINGERPRINTS;
 161       }
 162       $UniqueCompoundIDs{$CompoundID} = $CompoundID;
 163     }
 164 
 165     push @FingerprintsObjects, $FingerprintsFileIO->GetFingerprints();
 166     push @CompundIDs, $FingerprintsFileIO->GetCompoundID();
 167   }
 168   $FingerprintsFileIO->Close();
 169 
 170   print "Number of fingerprints data entries: $FingerprintsCount\n";
 171   print "Number of fingerprints date entries processed successfully: ", ($FingerprintsCount - $IgnoredFingerprintsCount)  , "\n";
 172   print "Number of fingerprints data entries ignored due to missing/invalid data: $IgnoredFingerprintsCount\n\n";
 173 
 174   return (\@CompundIDs, \@FingerprintsObjects);
 175 }
 176 
 177