Mercurial > repos > bgruening > openbabel_addh
comparison ob_filter.py @ 13:bfaad2f84dec draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 1fe240ef0064a1a4a66d9be1ccace53824280b75"
| author | bgruening |
|---|---|
| date | Mon, 19 Oct 2020 14:32:05 +0000 |
| parents | 9a08f0d1b305 |
| children | 640b694bb1f9 |
comparison
equal
deleted
inserted
replaced
| 12:9a08f0d1b305 | 13:bfaad2f84dec |
|---|---|
| 4 Output: set of molecules that pass all the filters | 4 Output: set of molecules that pass all the filters |
| 5 Copyright 2012, Bjoern Gruening and Xavier Lucas | 5 Copyright 2012, Bjoern Gruening and Xavier Lucas |
| 6 | 6 |
| 7 TODO: AND/OR conditions? | 7 TODO: AND/OR conditions? |
| 8 """ | 8 """ |
| 9 import sys, os | |
| 10 import argparse | 9 import argparse |
| 10 import json | |
| 11 import shlex | |
| 12 import subprocess | |
| 13 import sys | |
| 14 | |
| 11 import cheminfolib | 15 import cheminfolib |
| 12 import json | |
| 13 import shlex, subprocess | |
| 14 | |
| 15 from openbabel import pybel | 16 from openbabel import pybel |
| 16 cheminfolib.pybel_stop_logging() | 17 cheminfolib.pybel_stop_logging() |
| 18 | |
| 17 | 19 |
| 18 def parse_command_line(): | 20 def parse_command_line(): |
| 19 parser = argparse.ArgumentParser() | 21 parser = argparse.ArgumentParser() |
| 20 parser.add_argument('-i', '--input', help='Input file name') | 22 parser.add_argument('-i', '--input', help='Input file name') |
| 21 parser.add_argument('-iformat', help='Input file format') | 23 parser.add_argument('-iformat', help='Input file format') |
| 22 parser.add_argument('-oformat', | 24 parser.add_argument('-oformat', default='smi', |
| 23 default='smi', | 25 help='Output file format') |
| 24 help='Output file format') | 26 parser.add_argument('-o', '--output', help='Output file name', |
| 25 parser.add_argument('-o', '--output', | 27 required=True) |
| 26 help='Output file name', | 28 parser.add_argument('--filters', help="Specify the filters to apply", |
| 27 required=True) | 29 required=True) |
| 28 parser.add_argument('--filters', | 30 parser.add_argument('--list_of_names', required=False, |
| 29 help="Specify the filters to apply", | 31 help="A file with list of molecule names to extract. Every name is in one line.") |
| 30 required=True, | |
| 31 ) | |
| 32 parser.add_argument('--list_of_names', | |
| 33 help="A file with list of molecule names to extract. Every name is in one line.", | |
| 34 required=False, | |
| 35 ) | |
| 36 return parser.parse_args() | 32 return parser.parse_args() |
| 33 | |
| 37 | 34 |
| 38 def filter_precalculated_compounds(args, filters): | 35 def filter_precalculated_compounds(args, filters): |
| 39 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) | 36 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) |
| 40 for mol in pybel.readfile('sdf', args.input): | 37 for mol in pybel.readfile('sdf', args.input): |
| 41 for key, elem in filters.items(): | 38 for key, elem in filters.items(): |
| 51 else: | 48 else: |
| 52 # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound | 49 # if the filter loop terminates in a normal way (no break) all filter rules are satisfied, so save the compound |
| 53 outfile.write(mol) | 50 outfile.write(mol) |
| 54 outfile.close() | 51 outfile.close() |
| 55 | 52 |
| 53 | |
| 56 def filter_new_compounds(args, filters): | 54 def filter_new_compounds(args, filters): |
| 57 | 55 |
| 58 if args.iformat == args.oformat: | 56 if args.iformat == args.oformat: |
| 59 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out | 57 # use the -ocopy option from openbabel to speed up the filtering, additionally no conversion is carried out |
| 60 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text | 58 # http://openbabel.org/docs/dev/FileFormats/Copy_raw_text.html#copy-raw-text |
| 68 min = elem[0] | 66 min = elem[0] |
| 69 max = elem[1] | 67 max = elem[1] |
| 70 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) | 68 filter_cmd += ' %s>=%s %s<=%s ' % (ob_descriptor_name, min, ob_descriptor_name, max) |
| 71 | 69 |
| 72 args = shlex.split('%s "%s"' % (cmd, filter_cmd)) | 70 args = shlex.split('%s "%s"' % (cmd, filter_cmd)) |
| 73 #print '%s "%s"' % (cmd, filter_cmd) | 71 # print '%s "%s"' % (cmd, filter_cmd) |
| 74 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout | 72 # calling openbabel with subprocess and pipe potential errors occuring in openbabel to stdout |
| 75 child = subprocess.Popen(args, | 73 child = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| 76 stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| 77 | 74 |
| 78 stdout, stderr = child.communicate() | 75 stdout, stderr = child.communicate() |
| 79 return_code = child.returncode | 76 return_code = child.returncode |
| 80 | 77 |
| 81 if return_code: | 78 if return_code: |
| 85 sys.stderr.write("%s\n" % cmd) | 82 sys.stderr.write("%s\n" % cmd) |
| 86 else: | 83 else: |
| 87 sys.stdout.write(stdout.decode('utf-8')) | 84 sys.stdout.write(stdout.decode('utf-8')) |
| 88 sys.stdout.write(stderr.decode('utf-8')) | 85 sys.stdout.write(stderr.decode('utf-8')) |
| 89 | 86 |
| 87 | |
| 90 def filter_by_name(args): | 88 def filter_by_name(args): |
| 91 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) | 89 outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) |
| 92 for mol in pybel.readfile('sdf', args.input): | 90 for mol in pybel.readfile('sdf', args.input): |
| 93 for name in open(args.list_of_names): | 91 for name in open(args.list_of_names): |
| 94 if mol.title.strip() == name.strip(): | 92 if mol.title.strip() == name.strip(): |
| 95 outfile.write(mol) | 93 outfile.write(mol) |
| 96 outfile.close() | 94 outfile.close() |
| 97 | 95 |
| 96 | |
| 98 def __main__(): | 97 def __main__(): |
| 99 """ | 98 """ |
| 100 Select compounds with certain properties from a small library | 99 Select compounds with certain properties from a small library |
| 101 """ | 100 """ |
| 102 args = parse_command_line() | 101 args = parse_command_line() |
| 103 | 102 |
| 104 if args.filters == '__filter_by_name__': | 103 if args.filters == '__filter_by_name__': |
| 105 filter_by_name(args) | 104 filter_by_name(args) |
| 106 return | 105 return |
| 107 | 106 |
| 108 # Its a small trick to get the parameters in an easy way from the xml file. | 107 # Its a small trick to get the parameters in an easy way from the xml file. |
| 109 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. | 108 # To keep it readable in the xml file, many white-spaces are included in that string it needs to be removed. |
| 110 # Also the last loop creates a ',{' that is not an valid jason expression. | 109 # Also the last loop creates a ',{' that is not an valid jason expression. |
| 111 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) | 110 filters = json.loads((args.filters).replace(' ', '').replace(',}', '}')) |
| 112 if args.iformat == 'sdf': | 111 if args.iformat == 'sdf': |
| 113 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering | 112 # Check if the sdf file contains all of the required metadata to invoke the precalculation filtering |
| 114 mol = next(pybel.readfile('sdf', args.input)) | 113 mol = next(pybel.readfile('sdf', args.input)) |
| 115 for key, elem in filters.items(): | 114 for key, elem in filters.items(): |
| 116 property = cheminfolib.ColumnNames.get(key, key) | 115 property = cheminfolib.ColumnNames.get(key, key) |
| 117 if not property in mol.data: | 116 if property not in mol.data: |
| 118 break | 117 break |
| 119 else: | 118 else: |
| 120 # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule | 119 # if the for loop finishes in a normal way, we should habe all properties at least in the first molecule |
| 121 # assume it is the same for all other molecules and start the precalculated filtering | 120 # assume it is the same for all other molecules and start the precalculated filtering |
| 122 filter_precalculated_compounds(args, filters) | 121 filter_precalculated_compounds(args, filters) |
| 123 return True | 122 return True |
| 124 filter_new_compounds(args, filters) | 123 filter_new_compounds(args, filters) |
| 125 | 124 |
| 126 | 125 |
| 127 if __name__ == "__main__" : | 126 if __name__ == "__main__": |
| 128 __main__() | 127 __main__() |
