comparison lib/Fingerprints/FingerprintsFileUtil.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 package Fingerprints::FingerprintsFileUtil;
2 #
3 # $RCSfile: FingerprintsFileUtil.pm,v $
4 # $Date: 2015/02/28 20:48:54 $
5 # $Revision: 1.14 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Exporter;
31 use Carp;
32 use TextUtil ();
33 use FileUtil ();
34 use FileIO::FingerprintsSDFileIO;
35 use FileIO::FingerprintsTextFileIO;
36 use FileIO::FingerprintsFPFileIO;
37
38 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
39
40 @ISA = qw(Exporter);
41 @EXPORT = qw();
42 @EXPORT_OK = qw(GetFingerprintsFileType ReadAndProcessFingerpritsData NewFingerprintsFileIO);
43
44 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
45
46 # Generate new FingerprintsFileIO object for a SD, FP or Text fingerprints file specified using file name
47 # along other appropriate parameters...
48 #
49 sub NewFingerprintsFileIO {
50 my(%FingerprintsFileIOParams) = @_;
51 my($FingerprintsFileIO, $FileType);
52
53 if (!(exists($FingerprintsFileIOParams{Name}) && TextUtil::IsNotEmpty($FingerprintsFileIOParams{Name}))) {
54 carp "Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File name is not specified...\n";
55 return undef;
56 }
57
58 if (!(exists($FingerprintsFileIOParams{Mode}) && TextUtil::IsNotEmpty($FingerprintsFileIOParams{Mode}))) {
59 carp "Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File mode is not specified...\n";
60 return undef;
61 }
62
63 $FileType = GetFingerprintsFileType($FingerprintsFileIOParams{Name});
64 if (TextUtil::IsEmpty($FileType)) {
65 carp "Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Can't create new FingerprintsFileIO object: File type is not specified...\n";
66 return undef;
67 }
68
69 # Generate fingerprints IO object...
70 FILETYPE: {
71 if ($FileType =~ /^SD$/i) {
72 $FingerprintsFileIO = new FileIO::FingerprintsSDFileIO(%FingerprintsFileIOParams);
73 last FILETYPE;
74 }
75 if ($FileType =~ /^FP$/i) {
76 $FingerprintsFileIO = new FileIO::FingerprintsFPFileIO(%FingerprintsFileIOParams);
77 last FILETYPE;
78 }
79 if ($FileType =~ /^Text$/i) {
80 $FingerprintsFileIO = new FileIO::FingerprintsTextFileIO(%FingerprintsFileIOParams);
81 last FILETYPE;
82 }
83 $FingerprintsFileIO = undef;
84 carp "Warning: Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO: Fingerprints file type, $FileType, is not valid. Supported file types: SD, FP or Text\n";
85 }
86
87 return $FingerprintsFileIO;
88 }
89
90 # Get fingerpritns file type from fingerprints file name...
91 #
92 sub GetFingerprintsFileType {
93 my($FileName) = @_;
94 my($FileType);
95
96 $FileType = '';
97 FILETYPE: {
98 if (FileUtil::CheckFileType($FileName, "sdf sd")) {
99 $FileType = 'SD';
100 last FILETYPE;
101 }
102 if (FileUtil::CheckFileType($FileName, "fpf fp")) {
103 $FileType = 'FP';
104 last FILETYPE;
105 }
106 if (FileUtil::CheckFileType($FileName, "csv tsv")) {
107 $FileType = 'Text';
108 last FILETYPE;
109 }
110 $FileType = '';
111 carp "Warning: Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType: Can't determine fingerprints file type for $FileName: It's not a fingerprints file...\n";
112 }
113
114 return $FileType;
115 }
116
117
118 # Process fingerprints bit-vector and vector string data in a file using FingerprintsFileIO
119 # object and return a references to arrays of CompoundIDs and FingerprintsObjects...
120 #
121 # Note:
122 # . The file open and close is automatically performed during processing.
123 #
124 sub ReadAndProcessFingerpritsData {
125 my($FingerprintsFileIO, $CheckCompoundIDs) = @_;
126 my($CompoundID, $FingerprintsCount, $IgnoredFingerprintsCount, @CompundIDs, @FingerprintsObjects, %UniqueCompoundIDs);
127
128 if (!$FingerprintsFileIO) {
129 return (undef, undef);
130 }
131 $CheckCompoundIDs = defined $CheckCompoundIDs ? $CheckCompoundIDs : 0;
132
133 print "\nReading and processing fingerprints data...\n";
134
135 ($FingerprintsCount, $IgnoredFingerprintsCount) = (0) x 3;
136
137 @CompundIDs = ();
138 @FingerprintsObjects = ();
139
140 %UniqueCompoundIDs = ();
141
142 # Check and open file for reading...
143 if (!$FingerprintsFileIO->GetStatus()) {
144 $FingerprintsFileIO->Open();
145 }
146
147 FINGERPRINTS: while ($FingerprintsFileIO->Read()) {
148 $FingerprintsCount++;
149
150 if (!$FingerprintsFileIO->IsFingerprintsDataValid()) {
151 $IgnoredFingerprintsCount++;
152 next FINGERPRINTS;
153 }
154
155 if ($CheckCompoundIDs) {
156 $CompoundID = $FingerprintsFileIO->GetCompoundID();
157 if (exists $UniqueCompoundIDs{$CompoundID}) {
158 warn "Warning: Ignoring fingerprints data for compound ID $CompoundID: Multiple entries for compound ID in fingerprints file.\n";
159 $IgnoredFingerprintsCount++;
160 next FINGERPRINTS;
161 }
162 $UniqueCompoundIDs{$CompoundID} = $CompoundID;
163 }
164
165 push @FingerprintsObjects, $FingerprintsFileIO->GetFingerprints();
166 push @CompundIDs, $FingerprintsFileIO->GetCompoundID();
167 }
168 $FingerprintsFileIO->Close();
169
170 print "Number of fingerprints data entries: $FingerprintsCount\n";
171 print "Number of fingerprints date entries processed successfully: ", ($FingerprintsCount - $IgnoredFingerprintsCount) , "\n";
172 print "Number of fingerprints data entries ignored due to missing/invalid data: $IgnoredFingerprintsCount\n\n";
173
174 return (\@CompundIDs, \@FingerprintsObjects);
175 }
176
177
178 1;
179
180 __END__
181
182 =head1 NAME
183
184 FingerprintsFileUtil
185
186 =head1 SYNOPSIS
187
188 use Fingerprints::FingerprintsFileUtil;
189
190 use Fingerprints::FingerprintsFileUtil qw(:all);
191
192 =head1 DESCRIPTION
193
194 B<FingerprintsFileUtil> module provides the following functions:
195
196 GetFingerprintsFileType, NewFingerprintsFileIO, ReadAndProcessFingerpritsData
197
198 B<FingerprintsFileUtil> module provides function to handle fingerprints data strings
199 in FP, SD and CSV/TSV text files present in one of the following two types: fingerprints
200 bit-vectors and fingerprints vector strings
201
202 Example of B<FP> file format containing fingerprints bit-vector string data:
203
204 #
205 # Package = MayaChemTools 7.4
206 # ReleaseDate = Oct 21, 2010
207 #
208 # TimeStamp = Mon Mar 7 15:14:01 2011
209 #
210 # FingerprintsStringType = FingerprintsBitVector
211 #
212 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
213 # Size = 1024
214 # BitStringFormat = HexadecimalString
215 # BitsOrder = Ascending
216 #
217 Cmpd1 9c8460989ec8a49913991a6603130b0a19e8051c89184414953800cc21510...
218 Cmpd2 000000249400840040100042011001001980410c000000001010088001120...
219 ... ...
220 ... ..
221
222 Example of B<FP> file format containing fingerprints vector string data:
223
224 #
225 # Package = MayaChemTools 7.4
226 # ReleaseDate = Oct 21, 2010
227 #
228 # TimeStamp = Mon Mar 7 15:14:01 2011
229 #
230 # FingerprintsStringType = FingerprintsVector
231 #
232 # Description = PathLengthBits:AtomicInvariantsAtomTypes:MinLength1:...
233 # VectorStringFormat = IDsAndValuesString
234 # VectorValuesType = NumericalValues
235 #
236 Cmpd1 338;C F N O C:C C:N C=O CC CF CN CO C:C:C C:C:N C:CC C:CF C:CN C:
237 N:C C:NC CC:N CC=O CCC CCN CCO CNC NC=O O=CO C:C:C:C C:C:C:N C:C:CC...;
238 33 1 2 5 21 2 2 12 1 3 3 20 2 10 2 2 1 2 2 2 8 2 5 1 1 1 19 2 8 2 2 2 2
239 6 2 2 2 2 2 2 2 2 3 2 2 1 4 1 5 1 1 18 6 2 2 1 2 10 2 1 2 1 2 2 2 2 ...
240 Cmpd2 103;C N O C=N C=O CC CN CO CC=O CCC CCN CCO CNC N=CN NC=O NCN O=C
241 O C CC=O CCCC CCCN CCCO CCNC CNC=N CNC=O CNCN CCCC=O CCCCC CCCCN CC...;
242 15 4 4 1 2 13 5 2 2 15 5 3 2 2 1 1 1 2 17 7 6 5 1 1 1 2 15 8 5 7 2 2 2 2
243 1 2 1 1 3 15 7 6 8 3 4 4 3 2 2 1 2 3 14 2 4 7 4 4 4 4 1 1 1 2 1 1 1 ...
244 ... ...
245 ... ...
246
247 Example of B<SD> file format containing fingerprints vector string data:
248
249 ... ...
250 ... ...
251 $$$$
252 ... ...
253 ... ...
254 ... ...
255 41 44 0 0 0 0 0 0 0 0999 V2000
256 -3.3652 1.4499 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
257 ... ...
258 2 3 1 0 0 0 0
259 ... ...
260 M END
261 > <CmpdID>
262 Test
263
264 > <PathLengthFingerprints>
265 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLengt
266 h1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a49913991a66
267 03130b0a19e8051c89184414953800cc2151082844a201042800130860308e8204d4028
268 00831048940e44281c00060449a5000ac80c894114e006321264401600846c050164462
269 08190410805000304a10205b0100e04c0038ba0fad0209c0ca8b1200012268b61c0026a
270 aa0660a11014a011d46
271
272 $$$$
273 ... ...
274 ... ...
275
276 Example of CSV B<text> file format containing fingerprints bit-vector string data:
277
278 "CompoundID","PathLengthFingerprints"
279 "Cmpd1","FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes
280 :MinLength1:MaxLength8;1024;HexadecimalString;Ascending;9c8460989ec8a4
281 9913991a6603130b0a19e8051c89184414953800cc2151082844a20104280013086030
282 8e8204d402800831048940e44281c00060449a5000ac80c894114e006321264401..."
283 ... ...
284 ... ...
285
286 The current release of MayaChemTools supports the following types of fingerprint
287 bit-vector and vector strings:
288
289 FingerprintsVector;AtomNeighborhoods:AtomicInvariantsAtomTypes:MinRadi
290 us0:MaxRadius2;41;AlphaNumericalValues;ValuesString;NR0-C.X1.BO1.H3-AT
291 C1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-ATC1 NR0-C.X
292 1.BO1.H3-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2-C.X1.BO1.H3-ATC1:NR2-C.X3.BO4-A
293 TC1 NR0-C.X2.BO2.H2-ATC1:NR1-C.X2.BO2.H2-ATC1:NR1-C.X3.BO3.H1-ATC1:NR2
294 -C.X2.BO2.H2-ATC1:NR2-N.X3.BO3-ATC1:NR2-O.X1.BO1.H1-ATC1 NR0-C.X2.B...
295
296 FingerprintsVector;AtomTypesCount:AtomicInvariantsAtomTypes:ArbitraryS
297 ize;10;NumericalValues;IDsAndValuesString;C.X1.BO1.H3 C.X2.BO2.H2 C.X2
298 .BO3.H1 C.X3.BO3.H1 C.X3.BO4 F.X1.BO1 N.X2.BO2.H1 N.X3.BO3 O.X1.BO1.H1
299 O.X1.BO2;2 4 14 3 10 1 1 1 3 2
300
301 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:ArbitrarySize;16;Nume
302 ricalValues;IDsAndValuesString;C1 C10 C11 C14 C18 C20 C21 C22 C5 CS F
303 N11 N4 O10 O2 O9;5 1 1 1 14 4 2 1 2 2 1 1 1 1 3 1
304
305 FingerprintsVector;AtomTypesCount:SLogPAtomTypes:FixedSize;67;OrderedN
306 umericalValues;IDsAndValuesString;C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11 C
307 12 C13 C14 C15 C16 C17 C18 C19 C20 C21 C22 C23 C24 C25 C26 C27 CS N1 N
308 2 N3 N4 N5 N6 N7 N8 N9 N10 N11 N12 N13 N14 NS O1 O2 O3 O4 O5 O6 O7 O8
309 O9 O10 O11 O12 OS F Cl Br I Hal P S1 S2 S3 Me1 Me2;5 0 0 0 2 0 0 0 0 1
310 1 0 0 1 0 0 0 14 0 4 2 1 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0...
311
312 FingerprintsVector;EStateIndicies:ArbitrarySize;11;NumericalValues;IDs
313 AndValuesString;SaaCH SaasC SaasN SdO SdssC SsCH3 SsF SsOH SssCH2 SssN
314 H SsssCH;24.778 4.387 1.993 25.023 -1.435 3.975 14.006 29.759 -0.073 3
315 .024 -2.270
316
317 FingerprintsVector;EStateIndicies:FixedSize;87;OrderedNumericalValues;
318 ValuesString;0 0 0 0 0 0 0 3.975 0 -0.073 0 0 24.778 -2.270 0 0 -1.435
319 4.387 0 0 0 0 0 0 3.024 0 0 0 0 0 0 0 1.993 0 29.759 25.023 0 0 0 0 1
320 4.006 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
321 0 0 0 0 0 0 0 0 0 0 0 0 0 0
322
323 FingerprintsVector;ExtendedConnectivity:AtomicInvariantsAtomTypes:Radi
324 us2;60;AlphaNumericalValues;ValuesString;73555770 333564680 352413391
325 666191900 1001270906 1371674323 1481469939 1977749791 2006158649 21414
326 08799 49532520 64643108 79385615 96062769 273726379 564565671 85514103
327 5 906706094 988546669 1018231313 1032696425 1197507444 1331250018 1338
328 532734 1455473691 1607485225 1609687129 1631614296 1670251330 17303...
329
330 FingerprintsVector;ExtendedConnectivityCount:AtomicInvariantsAtomTypes
331 :Radius2;60;NumericalValues;IDsAndValuesString;73555770 333564680 3524
332 13391 666191900 1001270906 1371674323 1481469939 1977749791 2006158649
333 2141408799 49532520 64643108 79385615 96062769 273726379 564565671...;
334 3 2 1 1 14 1 2 10 4 3 1 1 1 1 2 1 2 1 1 1 2 3 1 1 2 1 3 3 8 2 2 2 6 2
335 1 2 1 1 2 1 1 1 2 1 1 2 1 2 1 1 1 1 1 1 1 1 1 2 1 1
336
337 FingerprintsBitVector;ExtendedConnectivityBits:AtomicInvariantsAtomTyp
338 es:Radius2;1024;BinaryString;Ascending;0000000000000000000000000000100
339 0000000001010000000110000011000000000000100000000000000000000000100001
340 1000000110000000000000000000000000010011000000000000000000000000010000
341 0000000000000000000000000010000000000000000001000000000000000000000000
342 0000000000010000100001000000000000101000000000000000100000000000000...
343
344 FingerprintsVector;ExtendedConnectivity:FunctionalClassAtomTypes:Radiu
345 s2;57;AlphaNumericalValues;ValuesString;24769214 508787397 850393286 8
346 62102353 981185303 1231636850 1649386610 1941540674 263599683 32920567
347 1 571109041 639579325 683993318 723853089 810600886 885767127 90326012
348 7 958841485 981022393 1126908698 1152248391 1317567065 1421489994 1455
349 632544 1557272891 1826413669 1983319256 2015750777 2029559552 20404...
350
351 FingerprintsVector;ExtendedConnectivity:EStateAtomTypes:Radius2;62;Alp
352 haNumericalValues;ValuesString;25189973 528584866 662581668 671034184
353 926543080 1347067490 1738510057 1759600920 2034425745 2097234755 21450
354 44754 96779665 180364292 341712110 345278822 386540408 387387308 50430
355 1706 617094135 771528807 957666640 997798220 1158349170 1291258082 134
356 1138533 1395329837 1420277211 1479584608 1486476397 1487556246 1566...
357
358 FingerprintsBitVector;MACCSKeyBits;166;BinaryString;Ascending;00000000
359 0000000000000000000000000000000001001000010010000000010010000000011100
360 0100101010111100011011000100110110000011011110100110111111111111011111
361 11111111111110111000
362
363 FingerprintsBitVector;MACCSKeyBits;322;BinaryString;Ascending;11101011
364 1110011111100101111111000111101100110000000000000011100010000000000000
365 0000000000000000000000000000000000000000000000101000000000000000000000
366 0000000000000000000000000000000000000000000000000000000000000000000000
367 0000000000000000000000000000000000000011000000000000000000000000000000
368 0000000000000000000000000000000000000000
369
370 FingerprintsVector;MACCSKeyCount;166;OrderedNumericalValues;ValuesStri
371 ng;0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
372 0 0 0 0 0 0 0 1 0 0 3 0 0 0 0 4 0 0 2 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0
373 0 0 0 0 1 1 8 0 0 0 1 0 0 1 0 1 0 1 0 3 1 3 1 0 0 0 1 2 0 11 1 0 0 0
374 5 0 0 1 2 0 1 1 0 0 0 0 0 1 1 0 1 1 1 1 0 4 0 0 1 1 0 4 6 1 1 1 2 1 1
375 3 5 2 2 0 5 3 5 1 1 2 5 1 2 1 2 4 8 3 5 5 2 2 0 3 5 4 1
376
377 FingerprintsVector;MACCSKeyCount;322;OrderedNumericalValues;ValuesStri
378 ng;14 8 2 0 2 0 4 4 2 1 4 0 0 2 5 10 5 2 1 0 0 2 0 5 13 3 28 5 5 3 0 0
379 0 4 2 1 1 0 1 1 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 22 5 3 0 0 0 1 0
380 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
381 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 0 2 0 0 0 0 0 0 0 0 0
382 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
383
384 FingerprintsBitVector;PathLengthBits:AtomicInvariantsAtomTypes:MinLeng
385 th1:MaxLength8;1024;BinaryString;Ascending;001000010011010101011000110
386 0100010101011000101001011100110001000010001001101000001001001001001000
387 0010110100000111001001000001001010100100100000000011000000101001011100
388 0010000001000101010100000100111100110111011011011000000010110111001101
389 0101100011000000010001000011000010100011101100001000001000100000000...
390
391 FingerprintsVector;PathLengthCount:AtomicInvariantsAtomTypes:MinLength
392 1:MaxLength8;432;NumericalValues;IDsAndValuesPairsString;C.X1.BO1.H3 2
393 C.X2.BO2.H2 4 C.X2.BO3.H1 14 C.X3.BO3.H1 3 C.X3.BO4 10 F.X1.BO1 1 N.X
394 2.BO2.H1 1 N.X3.BO3 1 O.X1.BO1.H1 3 O.X1.BO2 2 C.X1.BO1.H3C.X3.BO3.H1
395 2 C.X2.BO2.H2C.X2.BO2.H2 1 C.X2.BO2.H2C.X3.BO3.H1 4 C.X2.BO2.H2C.X3.BO
396 4 1 C.X2.BO2.H2N.X3.BO3 1 C.X2.BO3.H1:C.X2.BO3.H1 10 C.X2.BO3.H1:C....
397
398 FingerprintsVector;PathLengthCount:MMFF94AtomTypes:MinLength1:MaxLengt
399 h8;463;NumericalValues;IDsAndValuesPairsString;C5A 2 C5B 2 C=ON 1 CB 1
400 8 COO 1 CR 9 F 1 N5 1 NC=O 1 O=CN 1 O=CO 1 OC=O 1 OR 2 C5A:C5B 2 C5A:N
401 5 2 C5ACB 1 C5ACR 1 C5B:C5B 1 C5BC=ON 1 C5BCB 1 C=ON=O=CN 1 C=ONNC=O 1
402 CB:CB 18 CBF 1 CBNC=O 1 COO=O=CO 1 COOCR 1 COOOC=O 1 CRCR 7 CRN5 1 CR
403 OR 2 C5A:C5B:C5B 2 C5A:C5BC=ON 1 C5A:C5BCB 1 C5A:N5:C5A 1 C5A:N5CR ...
404
405 FingerprintsVector;TopologicalAtomPairs:AtomicInvariantsAtomTypes:MinD
406 istance1:MaxDistance10;223;NumericalValues;IDsAndValuesString;C.X1.BO1
407 .H3-D1-C.X3.BO3.H1 C.X2.BO2.H2-D1-C.X2.BO2.H2 C.X2.BO2.H2-D1-C.X3.BO3.
408 H1 C.X2.BO2.H2-D1-C.X3.BO4 C.X2.BO2.H2-D1-N.X3.BO3 C.X2.BO3.H1-D1-...;
409 2 1 4 1 1 10 8 1 2 6 1 2 2 1 2 1 2 2 1 2 1 5 1 10 12 2 2 1 2 1 9 1 3 1
410 1 1 2 2 1 3 6 1 6 14 2 2 2 3 1 3 1 8 2 2 1 3 2 6 1 2 2 5 1 3 1 23 1...
411
412 FingerprintsVector;TopologicalAtomPairs:FunctionalClassAtomTypes:MinDi
413 stance1:MaxDistance10;144;NumericalValues;IDsAndValuesString;Ar-D1-Ar
414 Ar-D1-Ar.HBA Ar-D1-HBD Ar-D1-Hal Ar-D1-None Ar.HBA-D1-None HBA-D1-NI H
415 BA-D1-None HBA.HBD-D1-NI HBA.HBD-D1-None HBD-D1-None NI-D1-None No...;
416 23 2 1 1 2 1 1 1 1 2 1 1 7 28 3 1 3 2 8 2 1 1 1 5 1 5 24 3 3 4 2 13 4
417 1 1 4 1 5 22 4 4 3 1 19 1 1 1 1 1 2 2 3 1 1 8 25 4 5 2 3 1 26 1 4 1 ...
418
419 FingerprintsVector;TopologicalAtomTorsions:AtomicInvariantsAtomTypes;3
420 3;NumericalValues;IDsAndValuesString;C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-
421 C.X3.BO4 C.X1.BO1.H3-C.X3.BO3.H1-C.X3.BO4-N.X3.BO3 C.X2.BO2.H2-C.X2.BO
422 2.H2-C.X3.BO3.H1-C.X2.BO2.H2 C.X2.BO2.H2-C.X2.BO2.H2-C.X3.BO3.H1-O...;
423 2 2 1 1 2 2 1 1 3 4 4 8 4 2 2 6 2 2 1 2 1 1 2 1 1 2 6 2 4 2 1 3 1
424
425 FingerprintsVector;TopologicalAtomTorsions:EStateAtomTypes;36;Numerica
426 lValues;IDsAndValuesString;aaCH-aaCH-aaCH-aaCH aaCH-aaCH-aaCH-aasC aaC
427 H-aaCH-aasC-aaCH aaCH-aaCH-aasC-aasC aaCH-aaCH-aasC-sF aaCH-aaCH-aasC-
428 ssNH aaCH-aasC-aasC-aasC aaCH-aasC-aasC-aasN aaCH-aasC-ssNH-dssC a...;
429 4 4 8 4 2 2 6 2 2 2 4 3 2 1 3 3 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1 1 2
430
431 FingerprintsVector;TopologicalAtomTriplets:AtomicInvariantsAtomTypes:M
432 inDistance1:MaxDistance10;3096;NumericalValues;IDsAndValuesString;C.X1
433 .BO1.H3-D1-C.X1.BO1.H3-D1-C.X3.BO3.H1-D2 C.X1.BO1.H3-D1-C.X2.BO2.H2-D1
434 0-C.X3.BO4-D9 C.X1.BO1.H3-D1-C.X2.BO2.H2-D3-N.X3.BO3-D4 C.X1.BO1.H3-D1
435 -C.X2.BO2.H2-D4-C.X2.BO2.H2-D5 C.X1.BO1.H3-D1-C.X2.BO2.H2-D6-C.X3....;
436 1 2 2 2 2 2 2 2 8 8 4 8 4 4 2 2 2 2 4 2 2 2 4 2 2 2 2 1 2 2 4 4 4 2 2
437 2 4 4 4 8 4 4 2 4 4 4 2 4 4 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 8...
438
439 FingerprintsVector;TopologicalAtomTriplets:SYBYLAtomTypes:MinDistance1
440 :MaxDistance10;2332;NumericalValues;IDsAndValuesString;C.2-D1-C.2-D9-C
441 .3-D10 C.2-D1-C.2-D9-C.ar-D10 C.2-D1-C.3-D1-C.3-D2 C.2-D1-C.3-D10-C.3-
442 D9 C.2-D1-C.3-D2-C.3-D3 C.2-D1-C.3-D2-C.ar-D3 C.2-D1-C.3-D3-C.3-D4 C.2
443 -D1-C.3-D3-N.ar-D4 C.2-D1-C.3-D3-O.3-D2 C.2-D1-C.3-D4-C.3-D5 C.2-D1-C.
444 3-D5-C.3-D6 C.2-D1-C.3-D5-O.3-D4 C.2-D1-C.3-D6-C.3-D7 C.2-D1-C.3-D7...
445
446 FingerprintsVector;TopologicalPharmacophoreAtomPairs:ArbitrarySize:Min
447 Distance1:MaxDistance10;54;NumericalValues;IDsAndValuesString;H-D1-H H
448 -D1-NI HBA-D1-NI HBD-D1-NI H-D2-H H-D2-HBA H-D2-HBD HBA-D2-HBA HBA-D2-
449 HBD H-D3-H H-D3-HBA H-D3-HBD H-D3-NI HBA-D3-NI HBD-D3-NI H-D4-H H-D4-H
450 BA H-D4-HBD HBA-D4-HBA HBA-D4-HBD HBD-D4-HBD H-D5-H H-D5-HBA H-D5-...;
451 18 1 2 1 22 12 8 1 2 18 6 3 1 1 1 22 13 6 5 7 2 28 9 5 1 1 1 36 16 10
452 3 4 1 37 10 8 1 35 10 9 3 3 1 28 7 7 4 18 16 12 5 1 2 1
453
454 FingerprintsVector;TopologicalPharmacophoreAtomPairs:FixedSize:MinDist
455 ance1:MaxDistance10;150;OrderedNumericalValues;ValuesString;18 0 0 1 0
456 0 0 2 0 0 1 0 0 0 0 22 12 8 0 0 1 2 0 0 0 0 0 0 0 0 18 6 3 1 0 0 0 1
457 0 0 1 0 0 0 0 22 13 6 0 0 5 7 0 0 2 0 0 0 0 0 28 9 5 1 0 0 0 1 0 0 1 0
458 0 0 0 36 16 10 0 0 3 4 0 0 1 0 0 0 0 0 37 10 8 0 0 0 0 1 0 0 0 0 0 0
459 0 35 10 9 0 0 3 3 0 0 1 0 0 0 0 0 28 7 7 4 0 0 0 0 0 0 0 0 0 0 0 18...
460
461 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:ArbitrarySize:
462 MinDistance1:MaxDistance10;696;NumericalValues;IDsAndValuesString;Ar1-
463 Ar1-Ar1 Ar1-Ar1-H1 Ar1-Ar1-HBA1 Ar1-Ar1-HBD1 Ar1-H1-H1 Ar1-H1-HBA1 Ar1
464 -H1-HBD1 Ar1-HBA1-HBD1 H1-H1-H1 H1-H1-HBA1 H1-H1-HBD1 H1-HBA1-HBA1 H1-
465 HBA1-HBD1 H1-HBA1-NI1 H1-HBD1-NI1 HBA1-HBA1-NI1 HBA1-HBD1-NI1 Ar1-...;
466 46 106 8 3 83 11 4 1 21 5 3 1 2 2 1 1 1 100 101 18 11 145 132 26 14 23
467 28 3 3 5 4 61 45 10 4 16 20 7 5 1 3 4 5 3 1 1 1 1 5 4 2 1 2 2 2 1 1 1
468 119 123 24 15 185 202 41 25 22 17 3 5 85 95 18 11 23 17 3 1 1 6 4 ...
469
470 FingerprintsVector;TopologicalPharmacophoreAtomTriplets:FixedSize:MinD
471 istance1:MaxDistance10;2692;OrderedNumericalValues;ValuesString;46 106
472 8 3 0 0 83 11 4 0 0 0 1 0 0 0 0 0 0 0 0 21 5 3 0 0 1 2 2 0 0 1 0 0 0
473 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 100 101 18 11 0 0 145 132 26
474 14 0 0 23 28 3 3 0 0 5 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 61 45 10 4 0
475 0 16 20 7 5 1 0 3 4 5 3 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 5 ...
476
477 =head1 FUNCTIONS
478
479 =over 4
480
481 =item B<GetFingerprintsFileType>
482
483 $FileType = GetFingerprintsFileType($FileName);
484
485 Returns fingerprints B<FileType> of I<FileName> determined using extension of file name.
486 Possible B<FileType> values: I<FP, SD, Text>. Supported file name extensions for various
487 file types are: B<FP> - I<fpf, fp>; B<SD> - I<sdf, sd>; B<Text> - I<csv, tsv>.
488
489 =item B<NewFingerprintsFileIO>
490
491 $FingerprintsFileIO = NewFingerprintsFileIO(%IOParameters);
492
493 Using specified I<IOParameters> property names and values hash, B<NewFingerprintsFileIO>
494 method creates a new object using appropriate fingerprints file IO class - B<FingerprintsFPFileIO,
495 FingerprintsSDFileIO, or FingerprintsTextFileIO> - and returns a reference to a newly created
496 B<FingerprintsFileIO> object.
497
498 The I<IOParameters> hash must contain I<Name> and I<Mode> as key/value pairs to create
499 a new B<FingerprintsFileIO> object.
500
501 Based on type of file - B<FP, SD or Text> - B<NewFingerprintsFileIO> use B<new> method
502 from appropriate class - B<FingerprintsFPFileIO> - along with I<IOParameters> to create
503 B<FingerprintsFileIO> object.
504
505 =item B<ReadAndProcessFingerpritsData>
506
507 ($CompoundIDsRef, $FingerprintsObjectRef) =
508 ReadAndProcessFingerpritsData($FingerprintsFileIO);
509
510 Processes fingerprints bit-vector and vector string data in a file using I<FingerprintsFileIO>
511 object and returns a references to arrays of B<CompoundIDs> and I<FingerprintsObjects>.
512
513 The file open and close is automatically performed during processing.
514
515 =back
516
517 =head1 AUTHOR
518
519 Manish Sud <msud@san.rr.com>
520
521 =head1 SEE ALSO
522
523 FingerprintsFPFileIO.pm, FingerprintsSDFileIO.pm, FingerprintsTextFileIO.pm
524
525 =head1 COPYRIGHT
526
527 Copyright (C) 2015 Manish Sud. All rights reserved.
528
529 This file is part of MayaChemTools.
530
531 MayaChemTools is free software; you can redistribute it and/or modify it under
532 the terms of the GNU Lesser General Public License as published by the Free
533 Software Foundation; either version 3 of the License, or (at your option)
534 any later version.
535
536 =cut