comparison subsearch/subsearch.py @ 0:6493d130f018

Uploaded
author bgruening
date Tue, 26 Mar 2013 14:49:44 -0400
parents
children 12822efbd4a4
comparison
equal deleted inserted replaced
-1:000000000000 0:6493d130f018
1 #!/usr/bin/env python
2 """
3 Input: Molecules in SDF, SMILES ...
4 Output: Moleculs filtered with specified substructures.
5 Copyright 2013, Bjoern Gruening and Xavier Lucas
6 """
7 import sys, os
8 import argparse
9 import openbabel
10 openbabel.obErrorLog.StopLogging()
11 import pybel
12 import multiprocessing
13 import tempfile
14 import subprocess
15 import shutil
16
17 def parse_command_line():
18 parser = argparse.ArgumentParser()
19 parser.add_argument('-i', '--infile', required=True, help='Molecule file.')
20 parser.add_argument('--fastsearch-index', dest="fastsearch_index",
21 required=True, help='Path to the openbabel fastsearch index.')
22 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.')
23 parser.add_argument('--oformat',
24 default='smi', help='Output file format')
25 parser.add_argument("--max-candidates", dest="max_candidates", type=int,
26 default=4000, help="The maximum number of candidates.")
27 parser.add_argument('-p', '--processors', type=int,
28 default=multiprocessing.cpu_count())
29 return parser.parse_args()
30
31 results = list()
32 def mp_callback(res):
33 results.append(res)
34
35 def mp_helper( query, args ):
36 """
37 Helper function for multiprocessing.
38 That function is a wrapper around the following command:
39 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999
40 """
41
42 if args.oformat == 'names':
43 opts = '-osmi -xt'
44 else:
45 opts = '-o%s' % args.oformat
46
47 tmp = tempfile.NamedTemporaryFile(delete=False)
48 cmd = 'obabel %s -O %s %s -ifs -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates)
49
50 child = subprocess.Popen(cmd.split(),
51 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
52
53 stdout, stderr = child.communicate()
54 return_code = child.returncode
55
56 if return_code:
57 sys.stdout.write(stdout)
58 sys.stderr.write(stderr)
59 sys.stderr.write("Return error code %i from command:\n" % return_code)
60 sys.stderr.write("%s\n" % cmd)
61 else:
62 sys.stdout.write(stdout)
63 sys.stdout.write(stderr)
64 return (tmp.name, query)
65
66
67 def substructure_search( args ):
68
69 pool = multiprocessing.Pool( args.processors )
70 for query in open( args.infile ):
71 pool.apply_async(mp_helper, args=(query.strip(), args), callback=mp_callback)
72 #mp_callback( mp_helper(query.strip(), args) )
73 pool.close()
74 pool.join()
75
76 if args.oformat == 'names':
77 out_handle = open( args.outfile, 'w' )
78 for result_file, query in results:
79 with open(result_file) as res_handle:
80 for line in res_handle:
81 out_handle.write('%s\t%s\n' % ( line.strip(), query ))
82 os.remove( result_file )
83 out_handle.close()
84 else:
85 out_handle = open( args.outfile, 'wb' )
86 for result_file, query in results:
87 res_handle = open(result_file,'rb')
88 shutil.copyfileobj( res_handle, out_handle )
89 res_handle.close()
90 os.remove( result_file )
91 out_handle.close()
92
93
94 def __main__():
95 """
96 Multiprocessing Open Babel Substructure Search.
97 """
98 args = parse_command_line()
99 substructure_search( args )
100
101 if __name__ == "__main__" :
102 __main__()