Mercurial > repos > bgruening > openbabel_addh
comparison subsearch.py @ 0:c6f3cedfe301 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
| author | bgruening |
|---|---|
| date | Sat, 20 May 2017 08:33:25 -0400 |
| parents | |
| children | 9a08f0d1b305 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c6f3cedfe301 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Input: Molecules in SDF, SMILES ... | |
| 4 Output: Moleculs filtered with specified substructures. | |
| 5 Copyright 2013, Bjoern Gruening and Xavier Lucas | |
| 6 """ | |
| 7 import sys, os | |
| 8 import argparse | |
| 9 import openbabel | |
| 10 openbabel.obErrorLog.StopLogging() | |
| 11 import pybel | |
| 12 import multiprocessing | |
| 13 import tempfile | |
| 14 import subprocess | |
| 15 import shutil | |
| 16 | |
| 17 def parse_command_line(): | |
| 18 parser = argparse.ArgumentParser() | |
| 19 parser.add_argument('-i', '--infile', required=True, help='Molecule file.') | |
| 20 parser.add_argument('--iformat', help='Input format.') | |
| 21 parser.add_argument('--fastsearch-index', dest="fastsearch_index", | |
| 22 required=True, help='Path to the openbabel fastsearch index.') | |
| 23 parser.add_argument('-o', '--outfile', required=True, help='Path to the output file.') | |
| 24 parser.add_argument('--oformat', | |
| 25 default='smi', help='Output file format') | |
| 26 parser.add_argument("--max-candidates", dest="max_candidates", type=int, | |
| 27 default=4000, help="The maximum number of candidates.") | |
| 28 parser.add_argument('-p', '--processors', type=int, | |
| 29 default=multiprocessing.cpu_count()) | |
| 30 return parser.parse_args() | |
| 31 | |
| 32 results = list() | |
| 33 def mp_callback(res): | |
| 34 results.append(res) | |
| 35 | |
| 36 def mp_helper( query, args ): | |
| 37 """ | |
| 38 Helper function for multiprocessing. | |
| 39 That function is a wrapper around the following command: | |
| 40 obabel file.fs -s"smarts" -Ooutfile.smi -al 999999999 | |
| 41 """ | |
| 42 | |
| 43 if args.oformat == 'names': | |
| 44 opts = '-osmi -xt' | |
| 45 else: | |
| 46 opts = '-o%s' % args.oformat | |
| 47 | |
| 48 tmp = tempfile.NamedTemporaryFile(delete=False) | |
| 49 cmd = 'obabel -ifs %s -O %s %s -s%s -al %s' % (args.fastsearch_index, tmp.name, opts, query, args.max_candidates) | |
| 50 | |
| 51 child = subprocess.Popen(cmd.split(), | |
| 52 stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| 53 | |
| 54 stdout, stderr = child.communicate() | |
| 55 return_code = child.returncode | |
| 56 | |
| 57 if return_code: | |
| 58 sys.stdout.write(stdout) | |
| 59 sys.stderr.write(stderr) | |
| 60 sys.stderr.write("Return error code %i from command:\n" % return_code) | |
| 61 sys.stderr.write("%s\n" % cmd) | |
| 62 else: | |
| 63 sys.stdout.write(stdout) | |
| 64 sys.stdout.write(stderr) | |
| 65 return (tmp.name, query) | |
| 66 | |
| 67 | |
| 68 def get_smiles_or_smarts( args ): | |
| 69 """ | |
| 70 Wrapper to retrieve a striped SMILES or SMARTS string from different input formats. | |
| 71 """ | |
| 72 if args.iformat in ['smi', 'text', 'tabular']: | |
| 73 with open( args.infile ) as text_file: | |
| 74 for line in text_file: | |
| 75 yield line.split('\t')[0].strip() | |
| 76 else: | |
| 77 # inchi or sdf files | |
| 78 for mol in pybel.readfile( args.iformat, args.infile ): | |
| 79 yield mol.write('smiles').split('\t')[0] | |
| 80 | |
| 81 def substructure_search( args ): | |
| 82 | |
| 83 pool = multiprocessing.Pool( args.processors ) | |
| 84 for query in get_smiles_or_smarts( args ): | |
| 85 pool.apply_async(mp_helper, args=(query, args), callback=mp_callback) | |
| 86 #mp_callback( mp_helper(query, args) ) | |
| 87 pool.close() | |
| 88 pool.join() | |
| 89 | |
| 90 if args.oformat == 'names': | |
| 91 out_handle = open( args.outfile, 'w' ) | |
| 92 for result_file, query in results: | |
| 93 with open(result_file) as res_handle: | |
| 94 for line in res_handle: | |
| 95 out_handle.write('%s\t%s\n' % ( line.strip(), query )) | |
| 96 os.remove( result_file ) | |
| 97 out_handle.close() | |
| 98 else: | |
| 99 out_handle = open( args.outfile, 'wb' ) | |
| 100 for result_file, query in results: | |
| 101 res_handle = open(result_file,'rb') | |
| 102 shutil.copyfileobj( res_handle, out_handle ) | |
| 103 res_handle.close() | |
| 104 os.remove( result_file ) | |
| 105 out_handle.close() | |
| 106 | |
| 107 | |
| 108 def __main__(): | |
| 109 """ | |
| 110 Multiprocessing Open Babel Substructure Search. | |
| 111 """ | |
| 112 args = parse_command_line() | |
| 113 substructure_search( args ) | |
| 114 | |
| 115 if __name__ == "__main__" : | |
| 116 __main__() |
