Mercurial > repos > bgruening > openbabel_addh
comparison ob_filter.py @ 0:c6f3cedfe301 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 01da22e4184a5a6f6a3dd4631a7b9c31d1b6d502
| author | bgruening |
|---|---|
| date | Sat, 20 May 2017 08:33:25 -0400 |
| parents | |
| children | 1b1e6531edc2 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c6f3cedfe301 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Input: set of molecules with pre-calculated physico-chemical properties | |
| 4 Output: set of molecules that pass all the filters | |
| 5 Copyright 2012, Bjoern Gruening and Xavier Lucas | |
| 6 | |
| 7 TODO: AND/OR conditions? | |
| 8 """ | |
| 9 import sys, os | |
| 10 import argparse | |
| 11 import cheminfolib | |
| 12 import json | |
| 13 import pybel | |
| 14 import shlex, subprocess | |
| 15 | |
| 16 cheminfolib.pybel_stop_logging() | |
| 17 | |
| 18 def parse_command_line(): | |
| 19 parser = argparse.ArgumentParser() | |
| 20 parser.add_argument('-i', '--input', help='Input file name') | |
| 21 parser.add_argument('-iformat', help='Input file format') | |
| 22 parser.add_argument('-oformat', | |
| 23 default='smi', | |
| 24 help='Output file format') | |
| 25 parser.add_argument('-o', '--output', | |
| 26 help='Output file name', | |
| 27 required=True) | |
| 28 parser.add_argument('--filters', | |
| 29 help="Specify the filters to apply", | |
| 30 required=True, | |
| 31 ) | |
| 32 return parser.parse_args() | |
| 33 | |
| 34 def filter_precalculated_compounds(args, filters): | |
| 35 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) | |
| 36 for mol in pybel.readfile('sdf', args.input): | |
| 37 for key, elem in filters.items(): | |
| 38 # map the short description to the larger metadata names stored in the sdf file | |
| 39 property = cheminfolib.ColumnNames[key] | |
| 40 min = elem[0] | |
| 41 max = elem[1] | |
| 42 if float(mol.data[property]) >= float(min) and float(mol.data[property]) <= float(max): | |
| 43 pass | |
| 44 else: | |
| 45 # leave the filter loop, because one filter constrained are not satisfied | |
| 46 break | |
| 47 else: | |
| 48 # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound | |
| 49 outfile.write(mol) | |
| 50 outfile.close() | |
| 51 | |
| 52 def filter_new_compounds(args, filters): | |
| 53 | |
| 54 if args.iformat == args.oformat: | |
| 55 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out | |
| 56 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text | |
| 57 cmd = 'obabel -i%s %s -ocopy -O %s --filter' % (args.iformat, args.input, args.output) | |
| 58 else: | |
| 59 cmd = 'obabel -i%s %s -o%s -O %s --filter' % (args.iformat, args.input, args.oformat, args.output) | |
| 60 filter_cmd = '' | |
| 61 # OBDescriptor stores a mapping from our desc shortcut to the OB name [0] and a long description [1] | |
| 62 for key, elem in filters.items(): | |
| 63 ob_descriptor_name = cheminfolib.OBDescriptor[key][0] | |
| 64 min = elem[0] | |
| 65 max = elem[1] | |
| 66 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) | |
| 67 | |
| 68 args = shlex.split('%s "%s"' % (cmd, filter_cmd)) | |
| 69 #print '%s "%s"' % (cmd, filter_cmd) | |
| 70 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout | |
| 71 child = subprocess.Popen(args, | |
| 72 stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| 73 | |
| 74 stdout, stderr = child.communicate() | |
| 75 return_code = child.returncode | |
| 76 | |
| 77 if return_code: | |
| 78 sys.stdout.write(stdout.decode('utf-8')) | |
| 79 sys.stderr.write(stderr.decode('utf-8')) | |
| 80 sys.stderr.write("Return error code %i from command:\n" % return_code) | |
| 81 sys.stderr.write("%s\n" % cmd) | |
| 82 else: | |
| 83 sys.stdout.write(stdout.decode('utf-8')) | |
| 84 sys.stdout.write(stderr.decode('utf-8')) | |
| 85 | |
| 86 | |
| 87 def __main__(): | |
| 88 """ | |
| 89 Select compounds with certain properties from a small library | |
| 90 """ | |
| 91 args = parse_command_line() | |
| 92 # Its a small trick to get the parameters in an easy way from the xml file. | |
| 93 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. | |
| 94 # Also the last loop creates a ',{' that is not an valid jason expression. | |
| 95 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) | |
| 96 if args.iformat == 'sdf': | |
| 97 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering | |
| 98 mol = pybel.readfile('sdf', args.input).next() | |
| 99 for key, elem in filters.items(): | |
| 100 property = cheminfolib.ColumnNames[key] | |
| 101 if not property in mol.data: | |
| 102 break | |
| 103 else: | |
| 104 # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule | |
| 105 # assume it is the same for all other molecules and start the precalculated filtering | |
| 106 filter_precalculated_compounds(args, filters) | |
| 107 return True | |
| 108 filter_new_compounds(args, filters) | |
| 109 | |
| 110 | |
| 111 if __name__ == "__main__" : | |
| 112 __main__() |
