Mercurial > repos > recetox > biotransformer
changeset 3:6080aee7c4f6 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
author | recetox |
---|---|
date | Wed, 13 Jan 2021 11:17:53 +0000 |
parents | 3998017c374b |
children | 77f693bb14ac |
files | biotransformer.xml test-data/output1.csv test-data/output2.csv test-data/output3.csv wrapper_biotransformer.py |
diffstat | 5 files changed, 62 insertions(+), 42 deletions(-) [+] |
line wrap: on
line diff
--- a/biotransformer.xml Fri Sep 25 14:32:24 2020 +0000 +++ b/biotransformer.xml Wed Jan 13 11:17:53 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="biotransformer" name="BioTransformer" version="1.1.5+galaxy0"> +<tool id="biotransformer" name="BioTransformer" version="1.1.5+galaxy1"> <requirements> <requirement type="package" version="1.1.5">biotransformer</requirement> <requirement type="package" version="3.1.1">openbabel</requirement> @@ -73,17 +73,17 @@ Parameters explanation: -Input. Currently, only a CSV file with one SMILES per line is accepted. +**Input.** Currently, only a CSV file with one SMILES per line is accepted. -The type of description: Type of biotransformer - EC-based (ecbased), CYP450 (cyp450), Phase II (phaseII), Human gut +**The type of prediction:** EC-based (ecbased), CYP450 (cyp450), Phase II (phaseII), Human gut microbial (hgut), human super transformer* (superbio, or allHuman), Environmental microbial (envimicro). -The number of steps for the prediction. This option will be used for the EC-based, CYP450, Phase II, and Environmental +**The number of steps for the prediction:** this option will be used for the EC-based, CYP450, Phase II, and Environmental microbial biotransformers. The default value is 1. -Mass tolerance for metabolite identification (default is 0.01). +**Mass tolerance for metabolite identification** (default is 0.01). -Output of BioTransformer with CSV as an input are 3 CSV files. One without any filtering, second with filtered +**Output of BioTransformer** with CSV as an input are 3 CSV files. One without any filtering, second with filtered duplicates based on 6 columns (InChI, InChIKey, Synonyms, Molecular formula, Major Isotope Mass, AlogP) and third with filtered duplicates based on 3 columns (Molecular formula, Major Isotope Mass, AlogP).
--- a/test-data/output1.csv Fri Sep 25 14:32:24 2020 +0000 +++ b/test-data/output1.csv Wed Jan 13 11:17:53 2021 +0000 @@ -13,8 +13,8 @@ CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 2,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1cc(c(C)cc1O)O ","InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3",OQIOHYHRGZNZCW-UHFFFAOYSA-N,CC(C)C1=CC(=C(C)C=C1O)O,"Thymohydroquinone +Thymoquinol Hydrothymoquinone -Thymoquinol 2217-60-9 p-Cymene-2,5-diol 1,4-Benzenediol, 2-methyl-5-(1-methylethyl)- @@ -24,10 +24,11 @@ UNII-1C2ICM1R8V BRN 2084452 1C2ICM1R8V +2-methyl-5-propan-2-ylbenzene-1,4-diol +2-methyl-5-(1-methylethyl)-1,4-benzenediol 2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL Thymohydrochinon Thymohydroquinone (I) -2-methyl-5-propan-2-ylbenzene-1,4-diol SCHEMBL69082 p-Cymene-2,5-diol (8CI) CHEMBL4204349 @@ -72,7 +73,8 @@ CYP2E1 CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 4,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1c(cc(C)cc1O)O -","InChI=1S/C10H14O2/c1-6(2)10-8(11)4-7(3)5-9(10)12/h4-6,11-12H,1-3H3",TUWRZVAMHVWRER-UHFFFAOYSA-N,CC(C)C1=C(C=C(C)C=C1O)O,SCHEMBL1494319,12310887.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00004,BTM00004,Hydroxylation of benzene on carbon ortho to electron donating group,BTMR1045,"CYP1A2 +","InChI=1S/C10H14O2/c1-6(2)10-8(11)4-7(3)5-9(10)12/h4-6,11-12H,1-3H3",TUWRZVAMHVWRER-UHFFFAOYSA-N,CC(C)C1=C(C=C(C)C=C1O)O,"SCHEMBL1494319 +5-methyl-2-propan-2-ylbenzene-1,3-diol",12310887.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00004,BTM00004,Hydroxylation of benzene on carbon ortho to electron donating group,BTMR1045,"CYP1A2 CYP2C8 CYP2C9 CYP2C19 @@ -80,19 +82,19 @@ CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 5,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)c(c1O)O ","InChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3",LYUBXLHGANLIMX-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C(=C1O)O,"Cymopyrocatechol -3-Isopropyl-6-methylpyrocatechol +p-cymene-2,3-diol 490-06-2 -p-cymene-2,3-diol +3-Isopropyl-6-methylpyrocatechol NSC 40567 Pyrocatechol, 2-isopropyl-6-methyl- BRN 2248022 UNII-93XFQ715UL +3-methyl-6-propan-2-ylbenzene-1,2-diol 93XFQ715UL p-Cymene-2,3-diol (7CI,8CI) -3-isopropyl-6-methylcatechol +3-isopropyl-6-methylbenzene-1,2-diol 3-Isopropyl-6-Methyl-Benzene-1,2-Diol NSC40567 -3-methyl-6-propan-2-ylbenzene-1,2-diol SCHEMBL1494556 1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- (9CI) DTXSID10197652 @@ -114,6 +116,7 @@ 6,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(C(C)CO)c(c1)O ","InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3",CLJPRXFHCRIUKW-UHFFFAOYSA-N,C(C(C)C1=CC=C(C)C=C1O)O,"9-Hydroxythymol 61955-76-8 +p-cymene-3,8-diol p-Mentha-1,3,5-triene-3,9-diol",14432748.0,C10H14O2,166.099379688,1.5777000000000003,0,0,0,BTM00006,BTM00006,Hydroxylation of terminal methyl,BTMR1061,"CYP1A2 CYP2A6 CYP2B6 @@ -125,10 +128,11 @@ CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 7,CC(C)C1=CC=C(C)C=C1O,"C=C(C)c1ccc(C)cc1O ","InChI=1S/C10H12O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-6,11H,1H2,2-3H3",IHWFPRKZRRGTTI-UHFFFAOYSA-N,CC(=C)C1=CC=C(C)C=C1O,"8,9-Dehydrothymol +5-methyl-2-prop-1-en-2-ylphenol 2-isopropenyl-5-methylphenol 18612-99-2 Phenol, 5-methyl-2-(1-methylethenyl)- -5-methyl-2-prop-1-en-2-ylphenol +p-Cymen-8-en-3-ol m-Cresol, 6-isopropenyl- SCHEMBL686122 2-Isopropenyl-5-methyl-phenol @@ -142,19 +146,19 @@ CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 8,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)c(c1O)O ","InChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3",LYUBXLHGANLIMX-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C(=C1O)O,"Cymopyrocatechol -3-Isopropyl-6-methylpyrocatechol +p-cymene-2,3-diol 490-06-2 -p-cymene-2,3-diol +3-Isopropyl-6-methylpyrocatechol NSC 40567 Pyrocatechol, 2-isopropyl-6-methyl- BRN 2248022 UNII-93XFQ715UL +3-methyl-6-propan-2-ylbenzene-1,2-diol 93XFQ715UL p-Cymene-2,3-diol (7CI,8CI) -3-isopropyl-6-methylcatechol +3-isopropyl-6-methylbenzene-1,2-diol 3-Isopropyl-6-Methyl-Benzene-1,2-Diol NSC40567 -3-methyl-6-propan-2-ylbenzene-1,2-diol SCHEMBL1494556 1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- (9CI) DTXSID10197652
--- a/test-data/output2.csv Fri Sep 25 14:32:24 2020 +0000 +++ b/test-data/output2.csv Wed Jan 13 11:17:53 2021 +0000 @@ -7,8 +7,8 @@ CYP2D6",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 2,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1cc(c(C)cc1O)O ","InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3",OQIOHYHRGZNZCW-UHFFFAOYSA-N,CC(C)C1=CC(=C(C)C=C1O)O,"Thymohydroquinone +Thymoquinol Hydrothymoquinone -Thymoquinol 2217-60-9 p-Cymene-2,5-diol 1,4-Benzenediol, 2-methyl-5-(1-methylethyl)- @@ -18,10 +18,11 @@ UNII-1C2ICM1R8V BRN 2084452 1C2ICM1R8V +2-methyl-5-propan-2-ylbenzene-1,4-diol +2-methyl-5-(1-methylethyl)-1,4-benzenediol 2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL Thymohydrochinon Thymohydroquinone (I) -2-methyl-5-propan-2-ylbenzene-1,4-diol SCHEMBL69082 p-Cymene-2,5-diol (8CI) CHEMBL4204349 @@ -66,7 +67,8 @@ CYP2E1 CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 4,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1c(cc(C)cc1O)O -","InChI=1S/C10H14O2/c1-6(2)10-8(11)4-7(3)5-9(10)12/h4-6,11-12H,1-3H3",TUWRZVAMHVWRER-UHFFFAOYSA-N,CC(C)C1=C(C=C(C)C=C1O)O,SCHEMBL1494319,12310887.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00004,BTM00004,Hydroxylation of benzene on carbon ortho to electron donating group,BTMR1045,"CYP1A2 +","InChI=1S/C10H14O2/c1-6(2)10-8(11)4-7(3)5-9(10)12/h4-6,11-12H,1-3H3",TUWRZVAMHVWRER-UHFFFAOYSA-N,CC(C)C1=C(C=C(C)C=C1O)O,"SCHEMBL1494319 +5-methyl-2-propan-2-ylbenzene-1,3-diol",12310887.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00004,BTM00004,Hydroxylation of benzene on carbon ortho to electron donating group,BTMR1045,"CYP1A2 CYP2C8 CYP2C9 CYP2C19 @@ -74,19 +76,19 @@ CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 5,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)c(c1O)O ","InChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3",LYUBXLHGANLIMX-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C(=C1O)O,"Cymopyrocatechol -3-Isopropyl-6-methylpyrocatechol +p-cymene-2,3-diol 490-06-2 -p-cymene-2,3-diol +3-Isopropyl-6-methylpyrocatechol NSC 40567 Pyrocatechol, 2-isopropyl-6-methyl- BRN 2248022 UNII-93XFQ715UL +3-methyl-6-propan-2-ylbenzene-1,2-diol 93XFQ715UL p-Cymene-2,3-diol (7CI,8CI) -3-isopropyl-6-methylcatechol +3-isopropyl-6-methylbenzene-1,2-diol 3-Isopropyl-6-Methyl-Benzene-1,2-Diol NSC40567 -3-methyl-6-propan-2-ylbenzene-1,2-diol SCHEMBL1494556 1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- (9CI) DTXSID10197652 @@ -108,6 +110,7 @@ 6,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(C(C)CO)c(c1)O ","InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3",CLJPRXFHCRIUKW-UHFFFAOYSA-N,C(C(C)C1=CC=C(C)C=C1O)O,"9-Hydroxythymol 61955-76-8 +p-cymene-3,8-diol p-Mentha-1,3,5-triene-3,9-diol",14432748.0,C10H14O2,166.099379688,1.5777000000000003,0,0,0,BTM00006,BTM00006,Hydroxylation of terminal methyl,BTMR1061,"CYP1A2 CYP2A6 CYP2B6 @@ -119,10 +122,11 @@ CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 7,CC(C)C1=CC=C(C)C=C1O,"C=C(C)c1ccc(C)cc1O ","InChI=1S/C10H12O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-6,11H,1H2,2-3H3",IHWFPRKZRRGTTI-UHFFFAOYSA-N,CC(=C)C1=CC=C(C)C=C1O,"8,9-Dehydrothymol +5-methyl-2-prop-1-en-2-ylphenol 2-isopropenyl-5-methylphenol 18612-99-2 Phenol, 5-methyl-2-(1-methylethenyl)- -5-methyl-2-prop-1-en-2-ylphenol +p-Cymen-8-en-3-ol m-Cresol, 6-isopropenyl- SCHEMBL686122 2-Isopropenyl-5-methyl-phenol
--- a/test-data/output3.csv Fri Sep 25 14:32:24 2020 +0000 +++ b/test-data/output3.csv Wed Jan 13 11:17:53 2021 +0000 @@ -7,8 +7,8 @@ CYP2D6",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 2,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1cc(c(C)cc1O)O ","InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3",OQIOHYHRGZNZCW-UHFFFAOYSA-N,CC(C)C1=CC(=C(C)C=C1O)O,"Thymohydroquinone +Thymoquinol Hydrothymoquinone -Thymoquinol 2217-60-9 p-Cymene-2,5-diol 1,4-Benzenediol, 2-methyl-5-(1-methylethyl)- @@ -18,10 +18,11 @@ UNII-1C2ICM1R8V BRN 2084452 1C2ICM1R8V +2-methyl-5-propan-2-ylbenzene-1,4-diol +2-methyl-5-(1-methylethyl)-1,4-benzenediol 2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL Thymohydrochinon Thymohydroquinone (I) -2-methyl-5-propan-2-ylbenzene-1,4-diol SCHEMBL69082 p-Cymene-2,5-diol (8CI) CHEMBL4204349 @@ -68,6 +69,7 @@ 6,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(C(C)CO)c(c1)O ","InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3",CLJPRXFHCRIUKW-UHFFFAOYSA-N,C(C(C)C1=CC=C(C)C=C1O)O,"9-Hydroxythymol 61955-76-8 +p-cymene-3,8-diol p-Mentha-1,3,5-triene-3,9-diol",14432748.0,C10H14O2,166.099379688,1.5777000000000003,0,0,0,BTM00006,BTM00006,Hydroxylation of terminal methyl,BTMR1061,"CYP1A2 CYP2A6 CYP2B6 @@ -79,10 +81,11 @@ CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 7,CC(C)C1=CC=C(C)C=C1O,"C=C(C)c1ccc(C)cc1O ","InChI=1S/C10H12O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-6,11H,1H2,2-3H3",IHWFPRKZRRGTTI-UHFFFAOYSA-N,CC(=C)C1=CC=C(C)C=C1O,"8,9-Dehydrothymol +5-methyl-2-prop-1-en-2-ylphenol 2-isopropenyl-5-methylphenol 18612-99-2 Phenol, 5-methyl-2-(1-methylethenyl)- -5-methyl-2-prop-1-en-2-ylphenol +p-Cymen-8-en-3-ol m-Cresol, 6-isopropenyl- SCHEMBL686122 2-Isopropenyl-5-methyl-phenol
--- a/wrapper_biotransformer.py Fri Sep 25 14:32:24 2020 +0000 +++ b/wrapper_biotransformer.py Wed Jan 13 11:17:53 2021 +0000 @@ -1,9 +1,11 @@ import subprocess import sys import tempfile - +import re import pandas -from openbabel import pybel + +from openbabel import openbabel, pybel +openbabel.obErrorLog.StopLogging() # function for translating inchi to smiles @@ -46,17 +48,24 @@ smList3 = [] for _, (smiles,) in in_df.iterrows(): with tempfile.NamedTemporaryFile() as out: - subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) - tmp2 = pandas.read_csv(out.name) - tmp3 = pandas.read_csv(out.name) - tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) - tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"]) - smList2.append([smiles] * tmp2.shape[0]) - smList3.append([smiles] * tmp3.shape[0]) - out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)]) - out_df2 = pandas.concat([out_df2, tmp2]) - out_df3 = pandas.concat([out_df3, tmp3]) - smList1.append([smiles] * pandas.read_csv(out.name).shape[0]) + print("Working on compound: " + smiles) + if not re.search(r'\.', smiles): + subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) + try: + tmp2 = pandas.read_csv(out.name) + tmp3 = pandas.read_csv(out.name) + tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) + tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"]) + smList2.append([smiles] * tmp2.shape[0]) + smList3.append([smiles] * tmp3.shape[0]) + out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)]) + out_df2 = pandas.concat([out_df2, tmp2]) + out_df3 = pandas.concat([out_df3, tmp3]) + smList1.append([smiles] * pandas.read_csv(out.name).shape[0]) + except pandas.errors.EmptyDataError: + continue + else: + print("ERROR: Input compound cannot be a mixture.") smList1 = sum(smList1, []) # merge sublists into one list smList2 = sum(smList2, []) smList3 = sum(smList3, [])