1 package Fingerprints::FingerprintsFileUtil; 2 # 3 # $RCSfile: FingerprintsFileUtil.pm,v $ 4 # $Date: 2015/02/28 20:48:54 $ 5 # $Revision: 1.14 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Exporter; 31 use Carp; 32 use TextUtil (); 33 use FileUtil (); 34 use FileIO::FingerprintsSDFileIO; 35 use FileIO::FingerprintsTextFileIO; 36 use FileIO::FingerprintsFPFileIO; 37 38 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 39 40 @ISA = qw(Exporter); 41 @EXPORT = qw(); 42 @EXPORT_OK = qw(GetFingerprintsFileType ReadAndProcessFingerpritsData NewFingerprintsFileIO); 43 44 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 45 46 # Generate new FingerprintsFileIO object for a SD, FP or Text fingerprints file specified using file name 47 # along other appropriate parameters... 48 # 49 sub NewFingerprintsFileIO { 50 my(%FingerprintsFileIOParams) = @_; 51 my($FingerprintsFileIO, $FileType); 52 53 if (!(exists($FingerprintsFileIOParams{Name}) && TextUtil::IsNotEmpty($FingerprintsFileIOParams{Name}))) { 54 carp "Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File name is not specified...\n"; 55 return undef; 56 } 57 58 if (!(exists($FingerprintsFileIOParams{Mode}) && TextUtil::IsNotEmpty($FingerprintsFileIOParams{Mode}))) { 59 carp "Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File mode is not specified...\n"; 60 return undef; 61 } 62 63 $FileType = GetFingerprintsFileType($FingerprintsFileIOParams{Name}); 64 if (TextUtil::IsEmpty($FileType)) { 65 carp "Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File type is not specified...\n"; 66 return undef; 67 } 68 69 # Generate fingerprints IO object... 70 FILETYPE: { 71 if ($FileType =~ /^SD$/i) { 72 $FingerprintsFileIO = new FileIO::FingerprintsSDFileIO(%FingerprintsFileIOParams); 73 last FILETYPE; 74 } 75 if ($FileType =~ /^FP$/i) { 76 $FingerprintsFileIO = new FileIO::FingerprintsFPFileIO(%FingerprintsFileIOParams); 77 last FILETYPE; 78 } 79 if ($FileType =~ /^Text$/i) { 80 $FingerprintsFileIO = new FileIO::FingerprintsTextFileIO(%FingerprintsFileIOParams); 81 last FILETYPE; 82 } 83 $FingerprintsFileIO = undef; 84 carp "Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Fingerprints file type, $FileType, is not valid. Supported file types: SD, FP or Text\n"; 85 } 86 87 return $FingerprintsFileIO; 88 } 89 90 # Get fingerpritns file type from fingerprints file name... 91 # 92 sub GetFingerprintsFileType { 93 my($FileName) = @_; 94 my($FileType); 95 96 $FileType = ''; 97 FILETYPE: { 98 if (FileUtil::CheckFileType($FileName, "sdf sd")) { 99 $FileType = 'SD'; 100 last FILETYPE; 101 } 102 if (FileUtil::CheckFileType($FileName, "fpf fp")) { 103 $FileType = 'FP'; 104 last FILETYPE; 105 } 106 if (FileUtil::CheckFileType($FileName, "csv tsv")) { 107 $FileType = 'Text'; 108 last FILETYPE; 109 } 110 $FileType = ''; 111 carp "Warning: Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType: Can't determine fingerprints file type for $FileName: It's not a fingerprints file...\n"; 112 } 113 114 return $FileType; 115 } 116 117 118 # Process fingerprints bit-vector and vector string data in a file using FingerprintsFileIO 119 # object and return a references to arrays of CompoundIDs and FingerprintsObjects... 120 # 121 # Note: 122 # . The file open and close is automatically performed during processing. 123 # 124 sub ReadAndProcessFingerpritsData { 125 my($FingerprintsFileIO, $CheckCompoundIDs) = @_; 126 my($CompoundID, $FingerprintsCount, $IgnoredFingerprintsCount, @CompundIDs, @FingerprintsObjects, %UniqueCompoundIDs); 127 128 if (!$FingerprintsFileIO) { 129 return (undef, undef); 130 } 131 $CheckCompoundIDs = defined $CheckCompoundIDs ? $CheckCompoundIDs : 0; 132 133 print "\nReading and processing fingerprints data...\n"; 134 135 ($FingerprintsCount, $IgnoredFingerprintsCount) = (0) x 3; 136 137 @CompundIDs = (); 138 @FingerprintsObjects = (); 139 140 %UniqueCompoundIDs = (); 141 142 # Check and open file for reading... 143 if (!$FingerprintsFileIO->GetStatus()) { 144 $FingerprintsFileIO->Open(); 145 } 146 147 FINGERPRINTS: while ($FingerprintsFileIO->Read()) { 148 $FingerprintsCount++; 149 150 if (!$FingerprintsFileIO->IsFingerprintsDataValid()) { 151 $IgnoredFingerprintsCount++; 152 next FINGERPRINTS; 153 } 154 155 if ($CheckCompoundIDs) { 156 $CompoundID = $FingerprintsFileIO->GetCompoundID(); 157 if (exists $UniqueCompoundIDs{$CompoundID}) { 158 warn "Warning: Ignoring fingerprints data for compound ID $CompoundID: Multiple entries for compound ID in fingerprints file.\n"; 159 $IgnoredFingerprintsCount++; 160 next FINGERPRINTS; 161 } 162 $UniqueCompoundIDs{$CompoundID} = $CompoundID; 163 } 164 165 push @FingerprintsObjects, $FingerprintsFileIO->GetFingerprints(); 166 push @CompundIDs, $FingerprintsFileIO->GetCompoundID(); 167 } 168 $FingerprintsFileIO->Close(); 169 170 print "Number of fingerprints data entries: $FingerprintsCount\n"; 171 print "Number of fingerprints date entries processed successfully: ", ($FingerprintsCount - $IgnoredFingerprintsCount) , "\n"; 172 print "Number of fingerprints data entries ignored due to missing/invalid data: $IgnoredFingerprintsCount\n\n"; 173 174 return (\@CompundIDs, \@FingerprintsObjects); 175 } 176 177