Mercurial > repos > recetox > filter_compounds
annotate filter_compounds.py @ 2:8eb6e392c92b draft default tip
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
| author | recetox | 
|---|---|
| date | Tue, 22 Mar 2022 16:01:33 +0000 | 
| parents | 987357c6941c | 
| children | 
| rev | line source | 
|---|---|
| 0 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 1 import argparse | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 2 import re | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 3 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 4 from openbabel import openbabel, pybel | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 5 openbabel.obErrorLog.StopLogging() | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 6 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 7 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 8 def parse_command_line(): | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 9 parser = argparse.ArgumentParser() | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 10 parser.add_argument('-i', '--input', required=True, help='Input file name') | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 11 parser.add_argument('-o', '--output', required=True, help='Output file name') | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 12 parser.add_argument('-m', '--met', required=False, action='store_true', help='Remove organometallic compounds') | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 13 parser.add_argument('-a', '--anorg', required=False, action='store_true', help='Remove anorganic compounds') | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 14 return parser.parse_args() | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 15 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 16 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 17 def filter_compounds(args, pattern): | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 18 print(pattern) | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 19 with open(args.input, "r") as infile, open(args.output, "w") as outfile: | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 20 for line in infile: | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 21 values = line.split('\t', 1) | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 22 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 23 # check if input is list of SMILES or indexed table of SMILES | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 24 if values[0].isnumeric(): | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 25 mol = pybel.readstring('smi', values[1]).write('inchi').split('/')[1] if values[1].strip() else '' | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 26 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 27 # check if both organometallic and anorganic filtering passes | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 28 # write original line if compound is organic without metals | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 29 if False not in ([bool(re.search(rf'{x}', mol)) for x in pattern]): | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 30 outfile.write(line) | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 31 else: | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 32 outfile.write(f'{values[0]}\t{""}\n') | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 33 else: | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 34 mol = pybel.readstring('smi', values[0]).write('inchi').split('/')[1] | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 35 if False not in ([bool(re.search(rf'{x}', mol)) for x in pattern]): | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 36 outfile.write(line) | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 37 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 38 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 39 def __main__(): | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 40 """ | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 41 Filter organometallics and/or anorganic compounds. | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 42 """ | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 43 args = parse_command_line() | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 44 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 45 # check if user selected something to filter out, if not output file == input file | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 46 sel_pattern = [] | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 47 if args.met is False and args.anorg is False: | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 48 print("No filtering selected - user did not specify what to filter out.") | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 49 sel_pattern = r'^[a-zA-Z]+$' | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 50 # select patterns for filtering | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 51 if args.met: | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 52 sel_pattern.append(r'^(?:C|N|O|P|F|S|I|B|Si|Se|Cl|Br|Li|Na|H|K|[0-9]|\.)+$') | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 53 if args.anorg: | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 54 sel_pattern.append(r'[C][^abd-z]') | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 55 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 56 filter_compounds(args, sel_pattern) | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 57 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 58 | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 59 if __name__ == "__main__": | 
| 
987357c6941c
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
 recetox parents: diff
changeset | 60 __main__() | 
