Mercurial > repos > recetox > biotransformer
comparison wrapper_biotransformer.py @ 1:362a66a3889c draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
| author | recetox |
|---|---|
| date | Tue, 22 Sep 2020 14:42:15 +0000 |
| parents | |
| children | 6080aee7c4f6 |
comparison
equal
deleted
inserted
replaced
| 0:b5b62d04625a | 1:362a66a3889c |
|---|---|
| 1 import subprocess | |
| 2 import sys | |
| 3 import tempfile | |
| 4 | |
| 5 import pandas | |
| 6 from openbabel import pybel | |
| 7 | |
| 8 | |
| 9 # function for translating inchi to smiles | |
| 10 def InchiToSmiles(df): | |
| 11 sm = [] | |
| 12 for item in df['InChI']: | |
| 13 tmp = pybel.readstring("inchi", item) | |
| 14 sm.append(tmp.write("smi")) | |
| 15 return(sm) | |
| 16 | |
| 17 | |
| 18 executable = ["biotransformer"] | |
| 19 # executable_r = ["Rscript", "inchi_to_smiles.r"] | |
| 20 | |
| 21 argv = sys.argv[1:] | |
| 22 if "-icsv" in argv: | |
| 23 icsv = argv.pop(argv.index("-icsv") + 1) | |
| 24 argv.remove("-icsv") | |
| 25 | |
| 26 if "-ocsv" not in argv: | |
| 27 sys.stderr.write("excpected -ocsv parameter\n") | |
| 28 sys.exit(1) | |
| 29 ocsv = argv.pop(argv.index("-ocsv") + 1) | |
| 30 argv.remove("-ocsv") | |
| 31 ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1) | |
| 32 argv.remove("-ocsvDup") | |
| 33 ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1) | |
| 34 argv.remove("-ocsvDup2") | |
| 35 | |
| 36 in_df = pandas.read_csv(icsv, header=None) | |
| 37 out_df1 = pandas.DataFrame() # all results | |
| 38 out_df2 = pandas.DataFrame() # filtered results based on 6 columns | |
| 39 out_df3 = pandas.DataFrame() # filtered results based on 3 columns | |
| 40 | |
| 41 tmp2 = pandas.DataFrame() | |
| 42 tmp3 = pandas.DataFrame() | |
| 43 | |
| 44 smList1 = [] # list with smiles string | |
| 45 smList2 = [] | |
| 46 smList3 = [] | |
| 47 for _, (smiles,) in in_df.iterrows(): | |
| 48 with tempfile.NamedTemporaryFile() as out: | |
| 49 subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) | |
| 50 tmp2 = pandas.read_csv(out.name) | |
| 51 tmp3 = pandas.read_csv(out.name) | |
| 52 tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) | |
| 53 tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"]) | |
| 54 smList2.append([smiles] * tmp2.shape[0]) | |
| 55 smList3.append([smiles] * tmp3.shape[0]) | |
| 56 out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)]) | |
| 57 out_df2 = pandas.concat([out_df2, tmp2]) | |
| 58 out_df3 = pandas.concat([out_df3, tmp3]) | |
| 59 smList1.append([smiles] * pandas.read_csv(out.name).shape[0]) | |
| 60 smList1 = sum(smList1, []) # merge sublists into one list | |
| 61 smList2 = sum(smList2, []) | |
| 62 smList3 = sum(smList3, []) | |
| 63 | |
| 64 out_df1.insert(0, "SMILES query", smList1) | |
| 65 out_df1.drop_duplicates(inplace=True) | |
| 66 out_df1.insert(1, "SMILES target", InchiToSmiles(out_df1)) | |
| 67 out_df1.to_csv(ocsv) | |
| 68 | |
| 69 out_df2.insert(0, "SMILES query", smList2) | |
| 70 out_df3.insert(0, "SMILES query", smList3) | |
| 71 out_df2.drop_duplicates(inplace=True) | |
| 72 out_df3.drop_duplicates(inplace=True) | |
| 73 out_df2.insert(1, "SMILES target", InchiToSmiles(out_df2)) | |
| 74 out_df3.insert(1, "SMILES target", InchiToSmiles(out_df3)) | |
| 75 # out_df.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) | |
| 76 out_df2.to_csv(ocsv_dup) | |
| 77 out_df3.to_csv(ocsv_dup2) | |
| 78 else: | |
| 79 # code = subprocess.run(executable + argv).returncode | |
| 80 # sys.exit(code) | |
| 81 subprocess.run(executable + argv) | |
| 82 smile = argv.pop(argv.index("-ismi") + 1) | |
| 83 tmp = pandas.DataFrame() | |
| 84 out = argv.pop(argv.index("-ocsv") + 1) | |
| 85 tmp = pandas.read_csv(out) # reads created output file | |
| 86 tmp.insert(0, "SMILES query", smile) # add SMILES string for query | |
| 87 tmp.insert(1, "SMILES target", InchiToSmiles(tmp)) # add SMILES string for target | |
| 88 tmp.to_csv(out) |
