changeset 3:6080aee7c4f6 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 5cdd2628a1a509b3e0ccc599eaab63d664bf031a"
author recetox
date Wed, 13 Jan 2021 11:17:53 +0000
parents 3998017c374b
children 77f693bb14ac
files biotransformer.xml test-data/output1.csv test-data/output2.csv test-data/output3.csv wrapper_biotransformer.py
diffstat 5 files changed, 62 insertions(+), 42 deletions(-) [+]
line wrap: on
line diff
--- a/biotransformer.xml	Fri Sep 25 14:32:24 2020 +0000
+++ b/biotransformer.xml	Wed Jan 13 11:17:53 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="biotransformer" name="BioTransformer" version="1.1.5+galaxy0">
+<tool id="biotransformer" name="BioTransformer" version="1.1.5+galaxy1">
   <requirements>
      <requirement type="package" version="1.1.5">biotransformer</requirement>
      <requirement type="package" version="3.1.1">openbabel</requirement>
@@ -73,17 +73,17 @@
 
 Parameters explanation:
 
-Input. Currently, only a CSV file with one SMILES per line is accepted.
+**Input.** Currently, only a CSV file with one SMILES per line is accepted.
 
-The type of description: Type of biotransformer - EC-based (ecbased), CYP450 (cyp450), Phase II (phaseII), Human gut
+**The type of prediction:** EC-based (ecbased), CYP450 (cyp450), Phase II (phaseII), Human gut
 microbial (hgut), human super transformer* (superbio, or allHuman), Environmental microbial (envimicro).
 
-The number of steps for the prediction. This option will be used for the EC-based, CYP450, Phase II, and Environmental
+**The number of steps for the prediction:** this option will be used for the EC-based, CYP450, Phase II, and Environmental
 microbial biotransformers. The default value is 1.
 
-Mass tolerance for metabolite identification (default is 0.01).
+**Mass tolerance for metabolite identification** (default is 0.01).
 
-Output of BioTransformer with CSV as an input are 3 CSV files. One without any filtering, second with filtered
+**Output of BioTransformer** with CSV as an input are 3 CSV files. One without any filtering, second with filtered
 duplicates based on 6 columns (InChI, InChIKey, Synonyms, Molecular formula, Major Isotope Mass, AlogP) and third with
 filtered duplicates based on 3 columns (Molecular formula, Major Isotope Mass, AlogP).
 
--- a/test-data/output1.csv	Fri Sep 25 14:32:24 2020 +0000
+++ b/test-data/output1.csv	Wed Jan 13 11:17:53 2021 +0000
@@ -13,8 +13,8 @@
 CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
 2,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1cc(c(C)cc1O)O	
 ","InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3",OQIOHYHRGZNZCW-UHFFFAOYSA-N,CC(C)C1=CC(=C(C)C=C1O)O,"Thymohydroquinone
+Thymoquinol
 Hydrothymoquinone
-Thymoquinol
 2217-60-9
 p-Cymene-2,5-diol
 1,4-Benzenediol, 2-methyl-5-(1-methylethyl)-
@@ -24,10 +24,11 @@
 UNII-1C2ICM1R8V
 BRN 2084452
 1C2ICM1R8V
+2-methyl-5-propan-2-ylbenzene-1,4-diol
+2-methyl-5-(1-methylethyl)-1,4-benzenediol
 2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL
 Thymohydrochinon
 Thymohydroquinone (I)
-2-methyl-5-propan-2-ylbenzene-1,4-diol
 SCHEMBL69082
 p-Cymene-2,5-diol (8CI)
 CHEMBL4204349
@@ -72,7 +73,8 @@
 CYP2E1
 CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
 4,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1c(cc(C)cc1O)O	
-","InChI=1S/C10H14O2/c1-6(2)10-8(11)4-7(3)5-9(10)12/h4-6,11-12H,1-3H3",TUWRZVAMHVWRER-UHFFFAOYSA-N,CC(C)C1=C(C=C(C)C=C1O)O,SCHEMBL1494319,12310887.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00004,BTM00004,Hydroxylation of benzene on carbon ortho to electron donating group,BTMR1045,"CYP1A2
+","InChI=1S/C10H14O2/c1-6(2)10-8(11)4-7(3)5-9(10)12/h4-6,11-12H,1-3H3",TUWRZVAMHVWRER-UHFFFAOYSA-N,CC(C)C1=C(C=C(C)C=C1O)O,"SCHEMBL1494319
+5-methyl-2-propan-2-ylbenzene-1,3-diol",12310887.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00004,BTM00004,Hydroxylation of benzene on carbon ortho to electron donating group,BTMR1045,"CYP1A2
 CYP2C8
 CYP2C9
 CYP2C19
@@ -80,19 +82,19 @@
 CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
 5,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)c(c1O)O	
 ","InChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3",LYUBXLHGANLIMX-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C(=C1O)O,"Cymopyrocatechol
-3-Isopropyl-6-methylpyrocatechol
+p-cymene-2,3-diol
 490-06-2
-p-cymene-2,3-diol
+3-Isopropyl-6-methylpyrocatechol
 NSC 40567
 Pyrocatechol, 2-isopropyl-6-methyl-
 BRN 2248022
 UNII-93XFQ715UL
+3-methyl-6-propan-2-ylbenzene-1,2-diol
 93XFQ715UL
 p-Cymene-2,3-diol (7CI,8CI)
-3-isopropyl-6-methylcatechol
+3-isopropyl-6-methylbenzene-1,2-diol
 3-Isopropyl-6-Methyl-Benzene-1,2-Diol
 NSC40567
-3-methyl-6-propan-2-ylbenzene-1,2-diol
 SCHEMBL1494556
 1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- (9CI)
 DTXSID10197652
@@ -114,6 +116,7 @@
 6,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(C(C)CO)c(c1)O	
 ","InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3",CLJPRXFHCRIUKW-UHFFFAOYSA-N,C(C(C)C1=CC=C(C)C=C1O)O,"9-Hydroxythymol
 61955-76-8
+p-cymene-3,8-diol
 p-Mentha-1,3,5-triene-3,9-diol",14432748.0,C10H14O2,166.099379688,1.5777000000000003,0,0,0,BTM00006,BTM00006,Hydroxylation of terminal methyl,BTMR1061,"CYP1A2
 CYP2A6
 CYP2B6
@@ -125,10 +128,11 @@
 CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
 7,CC(C)C1=CC=C(C)C=C1O,"C=C(C)c1ccc(C)cc1O	
 ","InChI=1S/C10H12O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-6,11H,1H2,2-3H3",IHWFPRKZRRGTTI-UHFFFAOYSA-N,CC(=C)C1=CC=C(C)C=C1O,"8,9-Dehydrothymol
+5-methyl-2-prop-1-en-2-ylphenol
 2-isopropenyl-5-methylphenol
 18612-99-2
 Phenol, 5-methyl-2-(1-methylethenyl)-
-5-methyl-2-prop-1-en-2-ylphenol
+p-Cymen-8-en-3-ol
 m-Cresol, 6-isopropenyl-
 SCHEMBL686122
 2-Isopropenyl-5-methyl-phenol
@@ -142,19 +146,19 @@
 CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
 8,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)c(c1O)O	
 ","InChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3",LYUBXLHGANLIMX-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C(=C1O)O,"Cymopyrocatechol
-3-Isopropyl-6-methylpyrocatechol
+p-cymene-2,3-diol
 490-06-2
-p-cymene-2,3-diol
+3-Isopropyl-6-methylpyrocatechol
 NSC 40567
 Pyrocatechol, 2-isopropyl-6-methyl-
 BRN 2248022
 UNII-93XFQ715UL
+3-methyl-6-propan-2-ylbenzene-1,2-diol
 93XFQ715UL
 p-Cymene-2,3-diol (7CI,8CI)
-3-isopropyl-6-methylcatechol
+3-isopropyl-6-methylbenzene-1,2-diol
 3-Isopropyl-6-Methyl-Benzene-1,2-Diol
 NSC40567
-3-methyl-6-propan-2-ylbenzene-1,2-diol
 SCHEMBL1494556
 1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- (9CI)
 DTXSID10197652
--- a/test-data/output2.csv	Fri Sep 25 14:32:24 2020 +0000
+++ b/test-data/output2.csv	Wed Jan 13 11:17:53 2021 +0000
@@ -7,8 +7,8 @@
 CYP2D6",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
 2,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1cc(c(C)cc1O)O	
 ","InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3",OQIOHYHRGZNZCW-UHFFFAOYSA-N,CC(C)C1=CC(=C(C)C=C1O)O,"Thymohydroquinone
+Thymoquinol
 Hydrothymoquinone
-Thymoquinol
 2217-60-9
 p-Cymene-2,5-diol
 1,4-Benzenediol, 2-methyl-5-(1-methylethyl)-
@@ -18,10 +18,11 @@
 UNII-1C2ICM1R8V
 BRN 2084452
 1C2ICM1R8V
+2-methyl-5-propan-2-ylbenzene-1,4-diol
+2-methyl-5-(1-methylethyl)-1,4-benzenediol
 2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL
 Thymohydrochinon
 Thymohydroquinone (I)
-2-methyl-5-propan-2-ylbenzene-1,4-diol
 SCHEMBL69082
 p-Cymene-2,5-diol (8CI)
 CHEMBL4204349
@@ -66,7 +67,8 @@
 CYP2E1
 CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
 4,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1c(cc(C)cc1O)O	
-","InChI=1S/C10H14O2/c1-6(2)10-8(11)4-7(3)5-9(10)12/h4-6,11-12H,1-3H3",TUWRZVAMHVWRER-UHFFFAOYSA-N,CC(C)C1=C(C=C(C)C=C1O)O,SCHEMBL1494319,12310887.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00004,BTM00004,Hydroxylation of benzene on carbon ortho to electron donating group,BTMR1045,"CYP1A2
+","InChI=1S/C10H14O2/c1-6(2)10-8(11)4-7(3)5-9(10)12/h4-6,11-12H,1-3H3",TUWRZVAMHVWRER-UHFFFAOYSA-N,CC(C)C1=C(C=C(C)C=C1O)O,"SCHEMBL1494319
+5-methyl-2-propan-2-ylbenzene-1,3-diol",12310887.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00004,BTM00004,Hydroxylation of benzene on carbon ortho to electron donating group,BTMR1045,"CYP1A2
 CYP2C8
 CYP2C9
 CYP2C19
@@ -74,19 +76,19 @@
 CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
 5,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)c(c1O)O	
 ","InChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3",LYUBXLHGANLIMX-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C(=C1O)O,"Cymopyrocatechol
-3-Isopropyl-6-methylpyrocatechol
+p-cymene-2,3-diol
 490-06-2
-p-cymene-2,3-diol
+3-Isopropyl-6-methylpyrocatechol
 NSC 40567
 Pyrocatechol, 2-isopropyl-6-methyl-
 BRN 2248022
 UNII-93XFQ715UL
+3-methyl-6-propan-2-ylbenzene-1,2-diol
 93XFQ715UL
 p-Cymene-2,3-diol (7CI,8CI)
-3-isopropyl-6-methylcatechol
+3-isopropyl-6-methylbenzene-1,2-diol
 3-Isopropyl-6-Methyl-Benzene-1,2-Diol
 NSC40567
-3-methyl-6-propan-2-ylbenzene-1,2-diol
 SCHEMBL1494556
 1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- (9CI)
 DTXSID10197652
@@ -108,6 +110,7 @@
 6,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(C(C)CO)c(c1)O	
 ","InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3",CLJPRXFHCRIUKW-UHFFFAOYSA-N,C(C(C)C1=CC=C(C)C=C1O)O,"9-Hydroxythymol
 61955-76-8
+p-cymene-3,8-diol
 p-Mentha-1,3,5-triene-3,9-diol",14432748.0,C10H14O2,166.099379688,1.5777000000000003,0,0,0,BTM00006,BTM00006,Hydroxylation of terminal methyl,BTMR1061,"CYP1A2
 CYP2A6
 CYP2B6
@@ -119,10 +122,11 @@
 CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
 7,CC(C)C1=CC=C(C)C=C1O,"C=C(C)c1ccc(C)cc1O	
 ","InChI=1S/C10H12O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-6,11H,1H2,2-3H3",IHWFPRKZRRGTTI-UHFFFAOYSA-N,CC(=C)C1=CC=C(C)C=C1O,"8,9-Dehydrothymol
+5-methyl-2-prop-1-en-2-ylphenol
 2-isopropenyl-5-methylphenol
 18612-99-2
 Phenol, 5-methyl-2-(1-methylethenyl)-
-5-methyl-2-prop-1-en-2-ylphenol
+p-Cymen-8-en-3-ol
 m-Cresol, 6-isopropenyl-
 SCHEMBL686122
 2-Isopropenyl-5-methyl-phenol
--- a/test-data/output3.csv	Fri Sep 25 14:32:24 2020 +0000
+++ b/test-data/output3.csv	Wed Jan 13 11:17:53 2021 +0000
@@ -7,8 +7,8 @@
 CYP2D6",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
 2,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1cc(c(C)cc1O)O	
 ","InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3",OQIOHYHRGZNZCW-UHFFFAOYSA-N,CC(C)C1=CC(=C(C)C=C1O)O,"Thymohydroquinone
+Thymoquinol
 Hydrothymoquinone
-Thymoquinol
 2217-60-9
 p-Cymene-2,5-diol
 1,4-Benzenediol, 2-methyl-5-(1-methylethyl)-
@@ -18,10 +18,11 @@
 UNII-1C2ICM1R8V
 BRN 2084452
 1C2ICM1R8V
+2-methyl-5-propan-2-ylbenzene-1,4-diol
+2-methyl-5-(1-methylethyl)-1,4-benzenediol
 2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL
 Thymohydrochinon
 Thymohydroquinone (I)
-2-methyl-5-propan-2-ylbenzene-1,4-diol
 SCHEMBL69082
 p-Cymene-2,5-diol (8CI)
 CHEMBL4204349
@@ -68,6 +69,7 @@
 6,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(C(C)CO)c(c1)O	
 ","InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3",CLJPRXFHCRIUKW-UHFFFAOYSA-N,C(C(C)C1=CC=C(C)C=C1O)O,"9-Hydroxythymol
 61955-76-8
+p-cymene-3,8-diol
 p-Mentha-1,3,5-triene-3,9-diol",14432748.0,C10H14O2,166.099379688,1.5777000000000003,0,0,0,BTM00006,BTM00006,Hydroxylation of terminal methyl,BTMR1061,"CYP1A2
 CYP2A6
 CYP2B6
@@ -79,10 +81,11 @@
 CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
 7,CC(C)C1=CC=C(C)C=C1O,"C=C(C)c1ccc(C)cc1O	
 ","InChI=1S/C10H12O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-6,11H,1H2,2-3H3",IHWFPRKZRRGTTI-UHFFFAOYSA-N,CC(=C)C1=CC=C(C)C=C1O,"8,9-Dehydrothymol
+5-methyl-2-prop-1-en-2-ylphenol
 2-isopropenyl-5-methylphenol
 18612-99-2
 Phenol, 5-methyl-2-(1-methylethenyl)-
-5-methyl-2-prop-1-en-2-ylphenol
+p-Cymen-8-en-3-ol
 m-Cresol, 6-isopropenyl-
 SCHEMBL686122
 2-Isopropenyl-5-methyl-phenol
--- a/wrapper_biotransformer.py	Fri Sep 25 14:32:24 2020 +0000
+++ b/wrapper_biotransformer.py	Wed Jan 13 11:17:53 2021 +0000
@@ -1,9 +1,11 @@
 import subprocess
 import sys
 import tempfile
-
+import re
 import pandas
-from openbabel import pybel
+
+from openbabel import openbabel, pybel
+openbabel.obErrorLog.StopLogging()
 
 
 # function for translating inchi to smiles
@@ -46,17 +48,24 @@
     smList3 = []
     for _, (smiles,) in in_df.iterrows():
         with tempfile.NamedTemporaryFile() as out:
-            subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name])
-            tmp2 = pandas.read_csv(out.name)
-            tmp3 = pandas.read_csv(out.name)
-            tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"])
-            tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"])
-            smList2.append([smiles] * tmp2.shape[0])
-            smList3.append([smiles] * tmp3.shape[0])
-            out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)])
-            out_df2 = pandas.concat([out_df2, tmp2])
-            out_df3 = pandas.concat([out_df3, tmp3])
-            smList1.append([smiles] * pandas.read_csv(out.name).shape[0])
+            print("Working on compound: " + smiles)
+            if not re.search(r'\.', smiles):
+                subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name])
+                try:
+                    tmp2 = pandas.read_csv(out.name)
+                    tmp3 = pandas.read_csv(out.name)
+                    tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"])
+                    tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"])
+                    smList2.append([smiles] * tmp2.shape[0])
+                    smList3.append([smiles] * tmp3.shape[0])
+                    out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)])
+                    out_df2 = pandas.concat([out_df2, tmp2])
+                    out_df3 = pandas.concat([out_df3, tmp3])
+                    smList1.append([smiles] * pandas.read_csv(out.name).shape[0])
+                except pandas.errors.EmptyDataError:
+                    continue
+            else:
+                print("ERROR: Input compound cannot be a mixture.")
     smList1 = sum(smList1, [])  # merge sublists into one list
     smList2 = sum(smList2, [])
     smList3 = sum(smList3, [])