Mercurial > repos > recetox > biotransformer
changeset 4:77f693bb14ac draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 9a2276670c6ebf147ccd2cdd1cc54b306af3d20c"
author | recetox |
---|---|
date | Mon, 11 Apr 2022 10:09:39 +0000 |
parents | 6080aee7c4f6 |
children | c0fe7ad30ade |
files | biotransformer.xml macros.xml test-data/output1.csv test-data/output1.tsv test-data/output2.csv test-data/output2.tsv test-data/output3.csv test-data/output3.tsv wrapper_biotransformer.py |
diffstat | 9 files changed, 545 insertions(+), 695 deletions(-) [+] |
line wrap: on
line diff
--- a/biotransformer.xml Wed Jan 13 11:17:53 2021 +0000 +++ b/biotransformer.xml Mon Apr 11 10:09:39 2022 +0000 @@ -1,97 +1,70 @@ -<tool id="biotransformer" name="BioTransformer" version="1.1.5+galaxy1"> - <requirements> - <requirement type="package" version="1.1.5">biotransformer</requirement> - <requirement type="package" version="3.1.1">openbabel</requirement> - <requirement type="package" version="1.1.1">pandas</requirement> - <requirement type="package" version="3.7">python</requirement> - </requirements> - <command><![CDATA[ python3 '${__tool_directory__}/wrapper_biotransformer.py' - -k $k - -b $b - -s $steps - -t $tolerance - -a - -icsv $input_file - -ocsv $output_file - -ocsvDup $output_file2 - -ocsvDup2 $output_file3 - - ]]> - </command> - <inputs> - <param name="k" type="select" value="pred" label="The task to be permed [pred=prediction, cid=compound identification]."> - <option value="pred" selected="true" >pred</option> - <option value="cid">cid</option> - </param> - <param name="b" type="select" value="ecbased" label="The type of description."> - <option value="ecbased" selected="true" >EC-based</option> - <option value="cyp450">CYP450</option> - <option value="phaseII">Phase II</option> - <option value="hgut">Human gut microbial</option> - <option value="superbio">Human super transformer 1 (superbio)</option> - <option value="allHuman">Human super transformer 2 (allHuman)</option> - <option value="envimicro">Environmental microbial</option> - </param> - <param name="steps" type="integer" value="1" label=" The number of steps for the prediction."/> - <param name="tolerance" type="float" value="0.01" label="Mass tolerance for metabolite identification."/> - <param name="input_file" type="data" format="csv" label="Input CSV file."/> - </inputs> - - <outputs> - <data format="csv" name="output_file" label="BioTransformer - output."/> - <data format="csv" name="output_file2" label="BioTransformer - filtered CSV output."/> - <data format="csv" name="output_file3" label="BioTransformer - super-filtered CSV output."/> - </outputs> +<tool id="biotransformer" name="BioTransformer" version="@TOOL_VERSION@+galaxy1"> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="creator"/> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">biotransformer</requirement> + <requirement type="package" version="3.1.1">openbabel</requirement> + <requirement type="package" version="1.1.1">pandas</requirement> + </requirements> + <command detect_errors="aggressive"><![CDATA[ + python3 '${__tool_directory__}/wrapper_biotransformer.py' + -k $k + -b $b + -s $steps + -t $tolerance + -a + -icsv $input_file + -ocsv $output_file + -ocsvDup $output_file2 + -ocsvDup2 $output_file3 + ]]> + </command> + <inputs> + <param name="k" type="select" value="pred" + label="The task to be permed [pred=prediction, cid=compound identification]."> + <option value="pred" selected="true">pred</option> + <option value="cid">cid</option> + </param> + <param name="b" type="select" value="ecbased" label="The type of description."> + <option value="ecbased" selected="true">EC-based</option> + <option value="cyp450">CYP450</option> + <option value="phaseII">Phase II</option> + <option value="hgut">Human gut microbial</option> + <option value="superbio">Human super transformer 1 (superbio)</option> + <option value="allHuman">Human super transformer 2 (allHuman)</option> + <option value="envimicro">Environmental microbial</option> + </param> + <param name="steps" type="integer" value="1" label=" The number of steps for the prediction."/> + <param name="tolerance" type="float" value="0.01" label="Mass tolerance for metabolite identification."/> + <param name="input_file" type="data" format="csv" label="Input CSV file."/> + </inputs> - <tests> - <test> - <param name="k" value="pred"/> - <param name="b" value="allHuman"/> - <param name="steps" value="1"/> - <param name="tolerance" value="0.01"/> - <param name="input_file" value="smiles.csv"/> - <output name="output_file" value="output1.csv"/> - <output name="output_file2" value="output2.csv"/> - <output name="output_file3" value="output3.csv"/> - </test> - </tests> - - <help> -BioTransformer is a software tool that predicts small molecule metabolism in mammals, their gut micr obiota, -as well as the soil/aquatic microbiota. BioTransformer also assists scientists in metabolite identification, -based on the metabolism prediction. - -BioTransformer is offered to the public as a freely acessible software package under the GNU License GPL v2.1. + <outputs> + <data format="tsv" name="output_file" label="BioTransformer on ${on_string}"/> + <data format="tsv" name="output_file2" label="BioTransformer with filter on ${on_string}"/> + <data format="tsv" name="output_file3" label="BioTransformer with super filter on ${on_string}"/> + </outputs> -Users are free to copy and redistribute the material in any medium or format. Moreover, they could modify, and -build upon the material under the condition that they must give appropriate credit, provide links to the license, -and indicate if changes were made. Furthermore, the above copyright notice and this permission notice must be -included. Use and re-distribution of the these resources, in whole or in part, for commercial purposes requires -explicit permission of the authors. We ask that all users of the BioTransformer software tool, the BioTransformer -web server, or BioTransformerDB to cite the BioTransformer reference in any resulting publications, and to -acknowledge the authors. - -Parameters explanation: - -**Input.** Currently, only a CSV file with one SMILES per line is accepted. - -**The type of prediction:** EC-based (ecbased), CYP450 (cyp450), Phase II (phaseII), Human gut -microbial (hgut), human super transformer* (superbio, or allHuman), Environmental microbial (envimicro). - -**The number of steps for the prediction:** this option will be used for the EC-based, CYP450, Phase II, and Environmental -microbial biotransformers. The default value is 1. - -**Mass tolerance for metabolite identification** (default is 0.01). - -**Output of BioTransformer** with CSV as an input are 3 CSV files. One without any filtering, second with filtered -duplicates based on 6 columns (InChI, InChIKey, Synonyms, Molecular formula, Major Isotope Mass, AlogP) and third with -filtered duplicates based on 3 columns (Molecular formula, Major Isotope Mass, AlogP). - -(* ) While the 'superbio' option runs a set number of transformation steps in a pre-defined order (e.g. deconjugation -first, then Oxidation/reduction, etc.), the 'allHuman' option predicts all possible metabolites from any applicable -reaction(Oxidation, reduction, (de-)conjugation) at each step. - </help> - <citations> - <citation type="doi">https://doi.org/10.1186/s13321-018-0324-5</citation> - </citations> + <tests> + <test> + <param name="k" value="pred"/> + <param name="b" value="allHuman"/> + <param name="steps" value="1"/> + <param name="tolerance" value="0.01"/> + <param name="input_file" value="smiles.csv"/> + <output name="output_file" value="output1.tsv"/> + <output name="output_file2" value="output2.tsv"/> + <output name="output_file3" value="output3.tsv"/> + </test> + </tests> + <help> + <![CDATA[ + @HELP@ + ]]> + </help> + <citations> + <citation type="doi">https://doi.org/10.1186/s13321-018-0324-5</citation> + </citations> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Apr 11 10:09:39 2022 +0000 @@ -0,0 +1,57 @@ +<macros> + <token name="@TOOL_VERSION@">3.0</token> + <xml name="creator"> + <creator> + <person + givenName="Martin" + familyName="Čech" + url="https://github.com/martenson" + identifier="0000-0002-9318-1781" /> + <person + givenName="Karolína" + familyName="Trachtová" + url="https://github.com/trachtok" /> + <organization + url="https://www.recetox.muni.cz/" + email="GalaxyToolsDevelopmentandDeployment@space.muni.cz" + name="RECETOX MUNI" /> + </creator> + </xml> + <token name="@HELP@"> + <![CDATA[ + BioTransformer is a software tool that predicts small molecule metabolism in mammals, their gut micr obiota, + as well as the soil/aquatic microbiota. BioTransformer also assists scientists in metabolite identification, + based on the metabolism prediction. + + BioTransformer is offered to the public as a freely acessible software package under the GNU License GPL v3. + + Users are free to copy and redistribute the material in any medium or format. Moreover, they could modify, and + build upon the material under the condition that they must give appropriate credit, provide links to the license, + and indicate if changes were made. Furthermore, the above copyright notice and this permission notice must be + included. Use and re-distribution of the these resources, in whole or in part, for commercial purposes requires + explicit permission of the authors. We ask that all users of the BioTransformer software tool, the BioTransformer + web server, or BioTransformerDB to cite the BioTransformer reference in any resulting publications, and to + acknowledge the authors. + + Parameters explanation: + + **Input.** Currently, only a CSV file with one SMILES per line is accepted. + + **The type of prediction:** EC-based (ecbased), CYP450 (cyp450), Phase II (phaseII), Human gut + microbial (hgut), human super transformer* (superbio, or allHuman), Environmental microbial (envimicro). + + **The number of steps for the prediction:** this option will be used for the EC-based, CYP450, Phase II, and Environmental + microbial biotransformers. The default value is 1. + + **Mass tolerance for metabolite identification** (default is 0.01). + + **Output of BioTransformer** with CSV as an input are 3 TSV files. One without any filtering, second with filtered + duplicates based on 6 columns (InChI, InChIKey, Synonyms, Molecular formula, Major Isotope Mass, AlogP) and third with + filtered duplicates based on 3 columns (Molecular formula, Major Isotope Mass, AlogP). + + (* ) While the 'superbio' option runs a set number of transformation steps in a pre-defined order (e.g. deconjugation + first, then Oxidation/reduction, etc.), the 'allHuman' option predicts all possible metabolites from any applicable + reaction(Oxidation, reduction, (de-)conjugation) at each step. + ]]> + </token> +</macros>
--- a/test-data/output1.csv Wed Jan 13 11:17:53 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,249 +0,0 @@ -,SMILES query,SMILES target,InChI,InChIKey,SMILES,Synonyms,PUBCHEM_CID,Molecular formula,Major Isotope Mass,ALogP,Lipinski_Violations,Insecticide_Likeness_Violations,Post_Em_Herbicide_Likeness_Violations,Metabolite ID,cdk:Title,Reaction,Reaction ID,Enzyme(s),Biosystem,Precursor ID,Precursor SMILES,Precursor InChI,Precursor InChIKey,Precursor ALogP,Precursor Major Isotope Mass -0,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(c(c1)O)C(C)(C)O -","InChI=1S/C10H14O2/c1-7-4-5-8(9(11)6-7)10(2,3)12/h4-6,11-12H,1-3H3",UWRRYLNXMGBJKK-UHFFFAOYSA-N,CC(C)(C1=CC=C(C)C=C1O)O,,,C10H14O2,166.099379688,2.0267999999999997,0,0,0,BTM00001,BTM00001,Hydroxylation of penultimate aliphatic carbon adjacent to aromatic carbon,BTMR1076,"CYP1A2 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -1,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(c(c1)O)C(C)(C)O -","InChI=1S/C10H14O2/c1-7-4-5-8(9(11)6-7)10(2,3)12/h4-6,11-12H,1-3H3",UWRRYLNXMGBJKK-UHFFFAOYSA-N,CC(C)(C1=CC=C(C)C=C1O)O,,,C10H14O2,166.099379688,2.0267999999999997,0,0,0,BTM00001,BTM00001,Hydroxylation of non-terminal aliphatic carbon adjacent to aromatic ring,BTMR1077,"CYP1A2 -CYP2C8 -CYP2C9 -CYP2D6 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -2,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1cc(c(C)cc1O)O -","InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3",OQIOHYHRGZNZCW-UHFFFAOYSA-N,CC(C)C1=CC(=C(C)C=C1O)O,"Thymohydroquinone -Thymoquinol -Hydrothymoquinone -2217-60-9 -p-Cymene-2,5-diol -1,4-Benzenediol, 2-methyl-5-(1-methylethyl)- -2-Methyl-5-isopropylhydroquinone -Hydroquinone, 5-isopropyl-2-methyl- -NSC 34803 -UNII-1C2ICM1R8V -BRN 2084452 -1C2ICM1R8V -2-methyl-5-propan-2-ylbenzene-1,4-diol -2-methyl-5-(1-methylethyl)-1,4-benzenediol -2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL -Thymohydrochinon -Thymohydroquinone (I) -SCHEMBL69082 -p-Cymene-2,5-diol (8CI) -CHEMBL4204349 -CTK8H6569 -DTXSID70176706 -WLN: QR DQ B1 EY1&1 -NSC34803 -1, 2-methyl-5-(1-methylethyl)- -NSC-34803 -AKOS006274324 -ZINC100292063 -MCULE-6916835293 -2-isopropyl-5-methyl-benzene-1,4-diol -LS-77299 -5-methyl-2-(methylethyl)benzene-1,4-diol -2-methyl-5-(propan-2-yl)benzene-1,4-diol -FT-0700031 -ST51045581 -IMW",95779.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00002,BTM00002,p-Hydroxylation of phenol,BTMR1038,"CYP1A2 -CYP2A6 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -3,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(cc1O)CO -","InChI=1S/C10H14O2/c1-7(2)9-4-3-8(6-11)5-10(9)12/h3-5,7,11-12H,6H2,1-2H3",UNNQYEJIPIBHFS-UHFFFAOYSA-N,CC(C)C1=CC=C(CO)C=C1O,"77311-68-3 -CTK2G0272 -5-hydroxymethyl-2-isopropylphenol -DTXSID70554040 -2-Isopropyl-5-(hydroxymethyl)phenol -5-(Hydroxymethyl)-2-(propan-2-yl)phenol -Benzenemethanol, 3-hydroxy-4-(1-methylethyl)-",14002478.0,C10H14O2,166.099379688,1.3752000000000006,0,0,0,BTM00003,BTM00003,Aliphatic hydroxylation of methyl carbon adjacent to aromatic ring,BTMR1058,"CYP1A2 -CYP2A6 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -4,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1c(cc(C)cc1O)O -","InChI=1S/C10H14O2/c1-6(2)10-8(11)4-7(3)5-9(10)12/h4-6,11-12H,1-3H3",TUWRZVAMHVWRER-UHFFFAOYSA-N,CC(C)C1=C(C=C(C)C=C1O)O,"SCHEMBL1494319 -5-methyl-2-propan-2-ylbenzene-1,3-diol",12310887.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00004,BTM00004,Hydroxylation of benzene on carbon ortho to electron donating group,BTMR1045,"CYP1A2 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -5,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)c(c1O)O -","InChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3",LYUBXLHGANLIMX-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C(=C1O)O,"Cymopyrocatechol -p-cymene-2,3-diol -490-06-2 -3-Isopropyl-6-methylpyrocatechol -NSC 40567 -Pyrocatechol, 2-isopropyl-6-methyl- -BRN 2248022 -UNII-93XFQ715UL -3-methyl-6-propan-2-ylbenzene-1,2-diol -93XFQ715UL -p-Cymene-2,3-diol (7CI,8CI) -3-isopropyl-6-methylbenzene-1,2-diol -3-Isopropyl-6-Methyl-Benzene-1,2-Diol -NSC40567 -SCHEMBL1494556 -1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- (9CI) -DTXSID10197652 -5722AF -NSC-40567 -ZINC96035800 -AKOS006275160 -MCULE-2488475103 -3-methyl-6-propan-2-yl-benzene-1,2-diol -LS-136440 -3-methyl-6-(propan-2-yl)benzene-1,2-diol -1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- -A828568",95873.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00005,BTM00005,Hydroxylation of benzene on carbon ortho to electron donating group,BTMR1045,"CYP1A2 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -6,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(C(C)CO)c(c1)O -","InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3",CLJPRXFHCRIUKW-UHFFFAOYSA-N,C(C(C)C1=CC=C(C)C=C1O)O,"9-Hydroxythymol -61955-76-8 -p-cymene-3,8-diol -p-Mentha-1,3,5-triene-3,9-diol",14432748.0,C10H14O2,166.099379688,1.5777000000000003,0,0,0,BTM00006,BTM00006,Hydroxylation of terminal methyl,BTMR1061,"CYP1A2 -CYP2A6 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -7,CC(C)C1=CC=C(C)C=C1O,"C=C(C)c1ccc(C)cc1O -","InChI=1S/C10H12O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-6,11H,1H2,2-3H3",IHWFPRKZRRGTTI-UHFFFAOYSA-N,CC(=C)C1=CC=C(C)C=C1O,"8,9-Dehydrothymol -5-methyl-2-prop-1-en-2-ylphenol -2-isopropenyl-5-methylphenol -18612-99-2 -Phenol, 5-methyl-2-(1-methylethenyl)- -p-Cymen-8-en-3-ol -m-Cresol, 6-isopropenyl- -SCHEMBL686122 -2-Isopropenyl-5-methyl-phenol -CTK0E2283 -DTXSID60423892 -5-Methyl-2-(1-methylethenyl)phenol -5-Methyl-2-(prop-1-en-2-yl)phenol",6429037.0,C10H12O,148.088815004,3.0469000000000004,0,0,0,BTM00007,BTM00007,Terminal desaturation,BTMR1190,"CYP1A2 -CYP2A6 -CYP2C9 -CYP2D6 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -8,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)c(c1O)O -","InChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3",LYUBXLHGANLIMX-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C(=C1O)O,"Cymopyrocatechol -p-cymene-2,3-diol -490-06-2 -3-Isopropyl-6-methylpyrocatechol -NSC 40567 -Pyrocatechol, 2-isopropyl-6-methyl- -BRN 2248022 -UNII-93XFQ715UL -3-methyl-6-propan-2-ylbenzene-1,2-diol -93XFQ715UL -p-Cymene-2,3-diol (7CI,8CI) -3-isopropyl-6-methylbenzene-1,2-diol -3-Isopropyl-6-Methyl-Benzene-1,2-Diol -NSC40567 -SCHEMBL1494556 -1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- (9CI) -DTXSID10197652 -5722AF -NSC-40567 -ZINC96035800 -AKOS006275160 -MCULE-2488475103 -3-methyl-6-propan-2-yl-benzene-1,2-diol -LS-136440 -3-methyl-6-(propan-2-yl)benzene-1,2-diol -1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- -A828568",95873.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00005,BTM00005,O-Hydroxylation of phenol,BTMR1037,"CYP1A2 -CYP2A6 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -9,CC(C)C1=CC=C(C)C=C1O,"CC(C)C1=C(C=C(C)C2C1O2)O -","InChI=1S/C10H14O2/c1-5(2)8-7(11)4-6(3)9-10(8)12-9/h4-5,9-11H,1-3H3",DETRTAMOFLJUIH-UHFFFAOYSA-N,CC(C)C=1C2C(C(C)=CC1O)O2,,,C10H14O2,166.099379688,0.9412999999999996,0,0,0,BTM00008,BTM00008,Epoxidation of arene,BTMR1028,"CYP1A2 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -10,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(cc1O)CO -","InChI=1S/C10H14O2/c1-7(2)9-4-3-8(6-11)5-10(9)12/h3-5,7,11-12H,6H2,1-2H3",UNNQYEJIPIBHFS-UHFFFAOYSA-N,CC(C)C1=CC=C(CO)C=C1O,"77311-68-3 -CTK2G0272 -5-hydroxymethyl-2-isopropylphenol -DTXSID70554040 -2-Isopropyl-5-(hydroxymethyl)phenol -5-(Hydroxymethyl)-2-(propan-2-yl)phenol -Benzenemethanol, 3-hydroxy-4-(1-methylethyl)-",14002478.0,C10H14O2,166.099379688,1.3752000000000006,0,0,0,BTM00003,BTM00003,Allylic hydroxylation,BTMR0071,"CYP1A2 -CYP2B6 -CYP2C19 -CYP2D6 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -11,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(c(c1)O)C(C)(C)O -","InChI=1S/C10H14O2/c1-7-4-5-8(9(11)6-7)10(2,3)12/h4-6,11-12H,1-3H3",UWRRYLNXMGBJKK-UHFFFAOYSA-N,CC(C)(C1=CC=C(C)C=C1O)O,,,C10H14O2,166.099379688,2.0267999999999997,0,0,0,BTM00001,BTM00001,Allylic hydroxylation,BTMR0071,"CYP1A2 -CYP2B6 -CYP2C19 -CYP2D6 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -12,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(c(c1)O)C(C)(C)O -","InChI=1S/C10H14O2/c1-7-4-5-8(9(11)6-7)10(2,3)12/h4-6,11-12H,1-3H3",UWRRYLNXMGBJKK-UHFFFAOYSA-N,CC(C)(C1=CC=C(C)C=C1O)O,,,C10H14O2,166.099379688,2.0267999999999997,0,0,0,BTM00001,BTM00001,Hydroxylation of penultimate aliphatic tertiary carbon,BTMR1075,"CYP2A6 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2D6 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -13,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)cc1OC1C(C(C(C(C(=O)O)O1)O)O)O -","InChI=1S/C16H22O7/c1-7(2)9-5-4-8(3)6-10(9)22-16-13(19)11(17)12(18)14(23-16)15(20)21/h4-7,11-14,16-19H,1-3H3,(H,20,21)",ADQJSAVCKZSGMK-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C=C1OC2OC(C(O)C(O)C2O)C(O)=O,"NSC404789 -NSC-404789 -3,5-trihydroxy-6-(2-isopropyl-5-methyl-phenoxy)tetrahydro-2H-pyran-2-carboxylic acid",346530.0,C16H22O7,326.13655304400004,0.6636999999999996,0,1,1,BTM00009,BTM00009,Aromatic OH-glucuronidation,BTMR0166,Bacterial UDP-glucuronosyltransferase,GUTMICRO,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -14,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)cc1OC1C(C(C(C(C(=O)O)O1)O)O)O -","InChI=1S/C16H22O7/c1-7(2)9-5-4-8(3)6-10(9)22-16-13(19)11(17)12(18)14(23-16)15(20)21/h4-7,11-14,16-19H,1-3H3,(H,20,21)",ADQJSAVCKZSGMK-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C=C1OC2OC(C(O)C(O)C2O)C(O)=O,"NSC404789 -NSC-404789 -3,5-trihydroxy-6-(2-isopropyl-5-methyl-phenoxy)tetrahydro-2H-pyran-2-carboxylic acid",346530.0,C16H22O7,326.13655304400004,0.6636999999999996,0,1,1,BTM00009,BTM00009,3-OH-Glucuronidation of a phenolic compound,BTMR1231,Bacterial UDP-glucuronosyltransferase,GUTMICRO,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -15,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)cc1OS(=O)(=O)O -","InChI=1S/C10H14O4S/c1-7(2)9-5-4-8(3)6-10(9)14-15(11,12)13/h4-7H,1-3H3,(H,11,12,13)",NODSEPOUFZPJEQ-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C=C1OS(O)(=O)=O,"thymol sulfate -Thymol sulphate -Thymol sulfuric acid -Thymol sulphuric acid -SCHEMBL235717 -CHEBI:82911 -5-methyl-2-(propan-2-yl)phenyl hydrogen sulfate -Q27156452",12456386.0,C10H14O4S,230.061279928,2.0731,0,0,0,BTM00010,BTM00010,3-OH-Sulfonation of phenolic compound,BTMR0196,SULFOTRANSFERASE,GUTMICRO,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -16,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)cc1OC1C(C(C(C(C(=O)O)O1)O)O)O -","InChI=1S/C16H22O7/c1-7(2)9-5-4-8(3)6-10(9)22-16-13(19)11(17)12(18)14(23-16)15(20)21/h4-7,11-14,16-19H,1-3H3,(H,20,21)",ADQJSAVCKZSGMK-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C=C1OC2OC(C(O)C(O)C2O)C(O)=O,"NSC404789 -NSC-404789 -3,5-trihydroxy-6-(2-isopropyl-5-methyl-phenoxy)tetrahydro-2H-pyran-2-carboxylic acid",346530.0,C16H22O7,326.13655304400004,0.6636999999999996,0,1,1,BTM00009,BTM00009,Aromatic OH-glucuronidation,BTMR0166,EC 2.4.1.17,HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -17,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)cc1OC1C(C(C(C(C(=O)O)O1)O)O)O -","InChI=1S/C16H22O7/c1-7(2)9-5-4-8(3)6-10(9)22-16-13(19)11(17)12(18)14(23-16)15(20)21/h4-7,11-14,16-19H,1-3H3,(H,20,21)",ADQJSAVCKZSGMK-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C=C1OC2OC(C(O)C(O)C2O)C(O)=O,"NSC404789 -NSC-404789 -3,5-trihydroxy-6-(2-isopropyl-5-methyl-phenoxy)tetrahydro-2H-pyran-2-carboxylic acid",346530.0,C16H22O7,326.13655304400004,0.6636999999999996,0,1,1,BTM00009,BTM00009,3-OH-Glucuronidation of a phenolic compound,BTMR1231,EC 2.4.1.17,HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -18,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)cc1OS(=O)(=O)O -","InChI=1S/C10H14O4S/c1-7(2)9-5-4-8(3)6-10(9)14-15(11,12)13/h4-7H,1-3H3,(H,11,12,13)",NODSEPOUFZPJEQ-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C=C1OS(O)(=O)=O,"thymol sulfate -Thymol sulphate -Thymol sulfuric acid -Thymol sulphuric acid -SCHEMBL235717 -CHEBI:82911 -5-methyl-2-(propan-2-yl)phenyl hydrogen sulfate -Q27156452",12456386.0,C10H14O4S,230.061279928,2.0731,0,0,0,BTM00010,BTM00010,3-OH-Sulfonation of phenolic compound,BTMR0196,EC 2.8.2.1,HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output1.tsv Mon Apr 11 10:09:39 2022 +0000 @@ -0,0 +1,137 @@ + SMILES query SMILES target InChI InChIKey SMILES Synonyms PUBCHEM_CID Molecular formula Major Isotope Mass ALogP Lipinski_Violations Insecticide_Likeness_Violations Post_Em_Herbicide_Likeness_Violations Metabolite ID cdk:Title Reaction Reaction ID Enzyme(s) Biosystem Precursor ID Precursor SMILES Precursor InChI Precursor InChIKey Precursor ALogP Precursor Major Isotope Mass +0 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(C)cc1OC1C(C(C(C(C(=O)O)O1)O)O)O +" InChI=1S/C16H22O7/c1-7(2)9-5-4-8(3)6-10(9)22-16-13(19)11(17)12(18)14(23-16)15(20)21/h4-7,11-14,16-19H,1-3H3,(H,20,21) ADQJSAVCKZSGMK-UHFFFAOYSA-N CC(C)C1=CC=C(C)C=C1OC2OC(C(O)C(O)C2O)C(O)=O "NSC404789 +NSC-404789 +(2S,3S,4S,5R)-3,4,5-trihydroxy-6-(5-methyl-2-propan-2-ylphenoxy)oxane-2-carboxylic acid +3,5-trihydroxy-6-(2-isopropyl-5-methyl-phenoxy)tetrahydro-2H-pyran-2-carboxylic acid" 346530.0 C16H22O7 326.13655304400004 0.6636999999999996 0 1 1 BTM00001 BTM00001 Aromatic OH-glucuronidation BTMR0166 EC 2.4.1.17 HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +1 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(C)cc1OS(=O)(=O)O +" InChI=1S/C10H14O4S/c1-7(2)9-5-4-8(3)6-10(9)14-15(11,12)13/h4-7H,1-3H3,(H,11,12,13) NODSEPOUFZPJEQ-UHFFFAOYSA-N CC(C)C1=CC=C(C)C=C1OS(O)(=O)=O "thymol sulfate +Thymol sulphate +Thymol sulfuric acid +Thymol sulphuric acid +SCHEMBL235717 +CHEBI:82911 +5-methyl-2-(propan-2-yl)phenyl hydrogen sulfate +Q27156452" 12456386.0 C10H14O4S 230.061279928 2.0731 0 0 0 BTM00002 BTM00002 3-OH-Sulfonation of phenolic compound BTMR0196 EC 2.8.2.1 HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +2 CC(C)C1=CC=C(C)C=C1O "Cc1ccc(c(c1)O)C(C)(C)O +" InChI=1S/C10H14O2/c1-7-4-5-8(9(11)6-7)10(2,3)12/h4-6,11-12H,1-3H3 UWRRYLNXMGBJKK-UHFFFAOYSA-N CC(C)(C1=CC=C(C)C=C1O)O SCHEMBL22652590 11332674.0 C10H14O2 166.099379688 2.0267999999999997 0 0 0 BTM00003 BTM00003 Hydroxylation of penultimate aliphatic carbon adjacent to aromatic carbon BTMR1076 "CYP1A2 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +3 CC(C)C1=CC=C(C)C=C1O "CC(C)c1cc(c(C)cc1O)O +" InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3 OQIOHYHRGZNZCW-UHFFFAOYSA-N CC(C)C1=CC(=C(C)C=C1O)O "Thymohydroquinone +Thymoquinol +Hydrothymoquinone +2217-60-9 +1,4-Benzenediol, 2-methyl-5-(1-methylethyl)- +p-Cymene-2,5-diol +2-Methyl-5-isopropylhydroquinone +Hydroquinone, 5-isopropyl-2-methyl- +NSC 34803 +2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL +UNII-1C2ICM1R8V +1C2ICM1R8V +2-methyl-5-(1-methylethyl)-1,4-benzenediol +BRN 2084452 +Thymohydrochinon +Thymohydroquinone (I) +2-methyl-5-propan-2-ylbenzene-1,4-diol +SCHEMBL69082 +p-Cymene-2,5-diol (8CI) +CHEMBL4204349 +DTXSID70176706 +WLN: QR DQ B1 EY1&1 +NSC34803 +1, 2-methyl-5-(1-methylethyl)- +NSC-34803 +AKOS006274324 +ZINC100292063 +MCULE-6916835293 +2-isopropyl-5-methyl-benzene-1,4-diol +2-methyl-5-(propan-2-yl)benzene-1,4-diol +FT-0700031 +IMW" 95779.0 C10H14O2 166.099379688 2.198500000000001 0 0 0 BTM00004 BTM00004 p-Hydroxylation of phenol BTMR1038 "CYP1A2 +CYP2A6 +CYP2B6 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP2E1 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +4 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(cc1O)CO +" InChI=1S/C10H14O2/c1-7(2)9-4-3-8(6-11)5-10(9)12/h3-5,7,11-12H,6H2,1-2H3 UNNQYEJIPIBHFS-UHFFFAOYSA-N CC(C)C1=CC=C(CO)C=C1O "77311-68-3 +5-hydroxymethyl-2-isopropylphenol +DTXSID70554040 +2-Isopropyl-5-(hydroxymethyl)phenol +5-(Hydroxymethyl)-2-(propan-2-yl)phenol" 14002478.0 C10H14O2 166.099379688 1.3752000000000006 0 0 0 BTM00005 BTM00005 Aliphatic hydroxylation of methyl carbon adjacent to aromatic ring BTMR1058 "CYP1A2 +CYP2A6 +CYP2B6 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP2E1 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +5 CC(C)C1=CC=C(C)C=C1O "CC(C)c1c(cc(C)cc1O)O +" InChI=1S/C10H14O2/c1-6(2)10-8(11)4-7(3)5-9(10)12/h4-6,11-12H,1-3H3 TUWRZVAMHVWRER-UHFFFAOYSA-N CC(C)C1=C(C=C(C)C=C1O)O SCHEMBL1494319 12310887.0 C10H14O2 166.099379688 2.198500000000001 0 0 0 BTM00006 BTM00006 Hydroxylation of benzene on carbon ortho to electron donating group BTMR1045 "CYP1A2 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +6 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(C)c(c1O)O +" InChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3 LYUBXLHGANLIMX-UHFFFAOYSA-N CC(C)C1=CC=C(C)C(=C1O)O "Cymopyrocatechol +490-06-2 +3-isopropyl-6-methylcatechol +p-cymene-2,3-diol +3-Isopropyl-6-methylpyrocatechol +Pyrocatechol, 2-isopropyl-6-methyl- +UNII-93XFQ715UL +93XFQ715UL +NSC 40567 +3-isopropyl-6-methylbenzene-1,2-diol +BRN 2248022 +3-Isopropyl-6-Methyl-Benzene-1,2-Diol +NSC40567 +p-Cymene-2,3-diol (7CI,8CI) +3-methyl-6-propan-2-ylbenzene-1,2-diol +SCHEMBL1494556 +1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- (9CI) +DTXSID10197652 +5722AF +NSC-40567 +ZINC96035800 +AKOS006275160 +MCULE-2488475103 +3-methyl-6-propan-2-yl-benzene-1,2-diol +3-methyl-6-(propan-2-yl)benzene-1,2-diol +A828568" 95873.0 C10H14O2 166.099379688 2.198500000000001 0 0 0 BTM00007 BTM00007 Hydroxylation of benzene on carbon ortho to electron donating group BTMR1045 "CYP1A2 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +7 CC(C)C1=CC=C(C)C=C1O "Cc1ccc(C(C)CO)c(c1)O +" InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3 CLJPRXFHCRIUKW-UHFFFAOYSA-N C(C(C)C1=CC=C(C)C=C1O)O "9-Hydroxythymol +61955-76-8 +2-(1-hydroxypropan-2-yl)-5-methylphenol +p-cymene-3,8-diol +p-Mentha-1,3,5-triene-3,9-diol" 14432748.0 C10H14O2 166.099379688 1.5777000000000003 0 0 0 BTM00008 BTM00008 Hydroxylation of terminal methyl BTMR1061 "CYP1A2 +CYP2A6 +CYP2B6 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP2E1 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +8 CC(C)C1=CC=C(C)C=C1O "CC(C)C1=C(C=C(C)C2C1O2)O +" InChI=1S/C10H14O2/c1-5(2)8-7(11)4-6(3)9-10(8)12-9/h4-5,9-11H,1-3H3 DETRTAMOFLJUIH-UHFFFAOYSA-N CC(C)C=1C2C(C(C)=CC1O)O2 C10H14O2 166.099379688 0.9412999999999996 0 0 0 BTM00009 BTM00009 Epoxidation of arene BTMR1028 "CYP1A2 +CYP2B6 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2E1 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044
--- a/test-data/output2.csv Wed Jan 13 11:17:53 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,161 +0,0 @@ -,SMILES query,SMILES target,InChI,InChIKey,SMILES,Synonyms,PUBCHEM_CID,Molecular formula,Major Isotope Mass,ALogP,Lipinski_Violations,Insecticide_Likeness_Violations,Post_Em_Herbicide_Likeness_Violations,Metabolite ID,cdk:Title,Reaction,Reaction ID,Enzyme(s),Biosystem,Precursor ID,Precursor SMILES,Precursor InChI,Precursor InChIKey,Precursor ALogP,Precursor Major Isotope Mass -0,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(c(c1)O)C(C)(C)O -","InChI=1S/C10H14O2/c1-7-4-5-8(9(11)6-7)10(2,3)12/h4-6,11-12H,1-3H3",UWRRYLNXMGBJKK-UHFFFAOYSA-N,CC(C)(C1=CC=C(C)C=C1O)O,,,C10H14O2,166.099379688,2.0267999999999997,0,0,0,BTM00001,BTM00001,Hydroxylation of penultimate aliphatic carbon adjacent to aromatic carbon,BTMR1076,"CYP1A2 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -2,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1cc(c(C)cc1O)O -","InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3",OQIOHYHRGZNZCW-UHFFFAOYSA-N,CC(C)C1=CC(=C(C)C=C1O)O,"Thymohydroquinone -Thymoquinol -Hydrothymoquinone -2217-60-9 -p-Cymene-2,5-diol -1,4-Benzenediol, 2-methyl-5-(1-methylethyl)- -2-Methyl-5-isopropylhydroquinone -Hydroquinone, 5-isopropyl-2-methyl- -NSC 34803 -UNII-1C2ICM1R8V -BRN 2084452 -1C2ICM1R8V -2-methyl-5-propan-2-ylbenzene-1,4-diol -2-methyl-5-(1-methylethyl)-1,4-benzenediol -2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL -Thymohydrochinon -Thymohydroquinone (I) -SCHEMBL69082 -p-Cymene-2,5-diol (8CI) -CHEMBL4204349 -CTK8H6569 -DTXSID70176706 -WLN: QR DQ B1 EY1&1 -NSC34803 -1, 2-methyl-5-(1-methylethyl)- -NSC-34803 -AKOS006274324 -ZINC100292063 -MCULE-6916835293 -2-isopropyl-5-methyl-benzene-1,4-diol -LS-77299 -5-methyl-2-(methylethyl)benzene-1,4-diol -2-methyl-5-(propan-2-yl)benzene-1,4-diol -FT-0700031 -ST51045581 -IMW",95779.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00002,BTM00002,p-Hydroxylation of phenol,BTMR1038,"CYP1A2 -CYP2A6 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -3,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(cc1O)CO -","InChI=1S/C10H14O2/c1-7(2)9-4-3-8(6-11)5-10(9)12/h3-5,7,11-12H,6H2,1-2H3",UNNQYEJIPIBHFS-UHFFFAOYSA-N,CC(C)C1=CC=C(CO)C=C1O,"77311-68-3 -CTK2G0272 -5-hydroxymethyl-2-isopropylphenol -DTXSID70554040 -2-Isopropyl-5-(hydroxymethyl)phenol -5-(Hydroxymethyl)-2-(propan-2-yl)phenol -Benzenemethanol, 3-hydroxy-4-(1-methylethyl)-",14002478.0,C10H14O2,166.099379688,1.3752000000000006,0,0,0,BTM00003,BTM00003,Aliphatic hydroxylation of methyl carbon adjacent to aromatic ring,BTMR1058,"CYP1A2 -CYP2A6 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -4,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1c(cc(C)cc1O)O -","InChI=1S/C10H14O2/c1-6(2)10-8(11)4-7(3)5-9(10)12/h4-6,11-12H,1-3H3",TUWRZVAMHVWRER-UHFFFAOYSA-N,CC(C)C1=C(C=C(C)C=C1O)O,"SCHEMBL1494319 -5-methyl-2-propan-2-ylbenzene-1,3-diol",12310887.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00004,BTM00004,Hydroxylation of benzene on carbon ortho to electron donating group,BTMR1045,"CYP1A2 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -5,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)c(c1O)O -","InChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3",LYUBXLHGANLIMX-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C(=C1O)O,"Cymopyrocatechol -p-cymene-2,3-diol -490-06-2 -3-Isopropyl-6-methylpyrocatechol -NSC 40567 -Pyrocatechol, 2-isopropyl-6-methyl- -BRN 2248022 -UNII-93XFQ715UL -3-methyl-6-propan-2-ylbenzene-1,2-diol -93XFQ715UL -p-Cymene-2,3-diol (7CI,8CI) -3-isopropyl-6-methylbenzene-1,2-diol -3-Isopropyl-6-Methyl-Benzene-1,2-Diol -NSC40567 -SCHEMBL1494556 -1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- (9CI) -DTXSID10197652 -5722AF -NSC-40567 -ZINC96035800 -AKOS006275160 -MCULE-2488475103 -3-methyl-6-propan-2-yl-benzene-1,2-diol -LS-136440 -3-methyl-6-(propan-2-yl)benzene-1,2-diol -1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- -A828568",95873.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00005,BTM00005,Hydroxylation of benzene on carbon ortho to electron donating group,BTMR1045,"CYP1A2 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -6,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(C(C)CO)c(c1)O -","InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3",CLJPRXFHCRIUKW-UHFFFAOYSA-N,C(C(C)C1=CC=C(C)C=C1O)O,"9-Hydroxythymol -61955-76-8 -p-cymene-3,8-diol -p-Mentha-1,3,5-triene-3,9-diol",14432748.0,C10H14O2,166.099379688,1.5777000000000003,0,0,0,BTM00006,BTM00006,Hydroxylation of terminal methyl,BTMR1061,"CYP1A2 -CYP2A6 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -7,CC(C)C1=CC=C(C)C=C1O,"C=C(C)c1ccc(C)cc1O -","InChI=1S/C10H12O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-6,11H,1H2,2-3H3",IHWFPRKZRRGTTI-UHFFFAOYSA-N,CC(=C)C1=CC=C(C)C=C1O,"8,9-Dehydrothymol -5-methyl-2-prop-1-en-2-ylphenol -2-isopropenyl-5-methylphenol -18612-99-2 -Phenol, 5-methyl-2-(1-methylethenyl)- -p-Cymen-8-en-3-ol -m-Cresol, 6-isopropenyl- -SCHEMBL686122 -2-Isopropenyl-5-methyl-phenol -CTK0E2283 -DTXSID60423892 -5-Methyl-2-(1-methylethenyl)phenol -5-Methyl-2-(prop-1-en-2-yl)phenol",6429037.0,C10H12O,148.088815004,3.0469000000000004,0,0,0,BTM00007,BTM00007,Terminal desaturation,BTMR1190,"CYP1A2 -CYP2A6 -CYP2C9 -CYP2D6 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -9,CC(C)C1=CC=C(C)C=C1O,"CC(C)C1=C(C=C(C)C2C1O2)O -","InChI=1S/C10H14O2/c1-5(2)8-7(11)4-6(3)9-10(8)12-9/h4-5,9-11H,1-3H3",DETRTAMOFLJUIH-UHFFFAOYSA-N,CC(C)C=1C2C(C(C)=CC1O)O2,,,C10H14O2,166.099379688,0.9412999999999996,0,0,0,BTM00008,BTM00008,Epoxidation of arene,BTMR1028,"CYP1A2 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -13,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)cc1OC1C(C(C(C(C(=O)O)O1)O)O)O -","InChI=1S/C16H22O7/c1-7(2)9-5-4-8(3)6-10(9)22-16-13(19)11(17)12(18)14(23-16)15(20)21/h4-7,11-14,16-19H,1-3H3,(H,20,21)",ADQJSAVCKZSGMK-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C=C1OC2OC(C(O)C(O)C2O)C(O)=O,"NSC404789 -NSC-404789 -3,5-trihydroxy-6-(2-isopropyl-5-methyl-phenoxy)tetrahydro-2H-pyran-2-carboxylic acid",346530.0,C16H22O7,326.13655304400004,0.6636999999999996,0,1,1,BTM00009,BTM00009,Aromatic OH-glucuronidation,BTMR0166,Bacterial UDP-glucuronosyltransferase,GUTMICRO,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -15,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)cc1OS(=O)(=O)O -","InChI=1S/C10H14O4S/c1-7(2)9-5-4-8(3)6-10(9)14-15(11,12)13/h4-7H,1-3H3,(H,11,12,13)",NODSEPOUFZPJEQ-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C=C1OS(O)(=O)=O,"thymol sulfate -Thymol sulphate -Thymol sulfuric acid -Thymol sulphuric acid -SCHEMBL235717 -CHEBI:82911 -5-methyl-2-(propan-2-yl)phenyl hydrogen sulfate -Q27156452",12456386.0,C10H14O4S,230.061279928,2.0731,0,0,0,BTM00010,BTM00010,3-OH-Sulfonation of phenolic compound,BTMR0196,SULFOTRANSFERASE,GUTMICRO,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output2.tsv Mon Apr 11 10:09:39 2022 +0000 @@ -0,0 +1,137 @@ + SMILES query SMILES target InChI InChIKey SMILES Synonyms PUBCHEM_CID Molecular formula Major Isotope Mass ALogP Lipinski_Violations Insecticide_Likeness_Violations Post_Em_Herbicide_Likeness_Violations Metabolite ID cdk:Title Reaction Reaction ID Enzyme(s) Biosystem Precursor ID Precursor SMILES Precursor InChI Precursor InChIKey Precursor ALogP Precursor Major Isotope Mass +0 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(C)cc1OC1C(C(C(C(C(=O)O)O1)O)O)O +" InChI=1S/C16H22O7/c1-7(2)9-5-4-8(3)6-10(9)22-16-13(19)11(17)12(18)14(23-16)15(20)21/h4-7,11-14,16-19H,1-3H3,(H,20,21) ADQJSAVCKZSGMK-UHFFFAOYSA-N CC(C)C1=CC=C(C)C=C1OC2OC(C(O)C(O)C2O)C(O)=O "NSC404789 +NSC-404789 +(2S,3S,4S,5R)-3,4,5-trihydroxy-6-(5-methyl-2-propan-2-ylphenoxy)oxane-2-carboxylic acid +3,5-trihydroxy-6-(2-isopropyl-5-methyl-phenoxy)tetrahydro-2H-pyran-2-carboxylic acid" 346530.0 C16H22O7 326.13655304400004 0.6636999999999996 0 1 1 BTM00001 BTM00001 Aromatic OH-glucuronidation BTMR0166 EC 2.4.1.17 HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +1 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(C)cc1OS(=O)(=O)O +" InChI=1S/C10H14O4S/c1-7(2)9-5-4-8(3)6-10(9)14-15(11,12)13/h4-7H,1-3H3,(H,11,12,13) NODSEPOUFZPJEQ-UHFFFAOYSA-N CC(C)C1=CC=C(C)C=C1OS(O)(=O)=O "thymol sulfate +Thymol sulphate +Thymol sulfuric acid +Thymol sulphuric acid +SCHEMBL235717 +CHEBI:82911 +5-methyl-2-(propan-2-yl)phenyl hydrogen sulfate +Q27156452" 12456386.0 C10H14O4S 230.061279928 2.0731 0 0 0 BTM00002 BTM00002 3-OH-Sulfonation of phenolic compound BTMR0196 EC 2.8.2.1 HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +2 CC(C)C1=CC=C(C)C=C1O "Cc1ccc(c(c1)O)C(C)(C)O +" InChI=1S/C10H14O2/c1-7-4-5-8(9(11)6-7)10(2,3)12/h4-6,11-12H,1-3H3 UWRRYLNXMGBJKK-UHFFFAOYSA-N CC(C)(C1=CC=C(C)C=C1O)O SCHEMBL22652590 11332674.0 C10H14O2 166.099379688 2.0267999999999997 0 0 0 BTM00003 BTM00003 Hydroxylation of penultimate aliphatic carbon adjacent to aromatic carbon BTMR1076 "CYP1A2 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +3 CC(C)C1=CC=C(C)C=C1O "CC(C)c1cc(c(C)cc1O)O +" InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3 OQIOHYHRGZNZCW-UHFFFAOYSA-N CC(C)C1=CC(=C(C)C=C1O)O "Thymohydroquinone +Thymoquinol +Hydrothymoquinone +2217-60-9 +1,4-Benzenediol, 2-methyl-5-(1-methylethyl)- +p-Cymene-2,5-diol +2-Methyl-5-isopropylhydroquinone +Hydroquinone, 5-isopropyl-2-methyl- +NSC 34803 +2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL +UNII-1C2ICM1R8V +1C2ICM1R8V +2-methyl-5-(1-methylethyl)-1,4-benzenediol +BRN 2084452 +Thymohydrochinon +Thymohydroquinone (I) +2-methyl-5-propan-2-ylbenzene-1,4-diol +SCHEMBL69082 +p-Cymene-2,5-diol (8CI) +CHEMBL4204349 +DTXSID70176706 +WLN: QR DQ B1 EY1&1 +NSC34803 +1, 2-methyl-5-(1-methylethyl)- +NSC-34803 +AKOS006274324 +ZINC100292063 +MCULE-6916835293 +2-isopropyl-5-methyl-benzene-1,4-diol +2-methyl-5-(propan-2-yl)benzene-1,4-diol +FT-0700031 +IMW" 95779.0 C10H14O2 166.099379688 2.198500000000001 0 0 0 BTM00004 BTM00004 p-Hydroxylation of phenol BTMR1038 "CYP1A2 +CYP2A6 +CYP2B6 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP2E1 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +4 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(cc1O)CO +" InChI=1S/C10H14O2/c1-7(2)9-4-3-8(6-11)5-10(9)12/h3-5,7,11-12H,6H2,1-2H3 UNNQYEJIPIBHFS-UHFFFAOYSA-N CC(C)C1=CC=C(CO)C=C1O "77311-68-3 +5-hydroxymethyl-2-isopropylphenol +DTXSID70554040 +2-Isopropyl-5-(hydroxymethyl)phenol +5-(Hydroxymethyl)-2-(propan-2-yl)phenol" 14002478.0 C10H14O2 166.099379688 1.3752000000000006 0 0 0 BTM00005 BTM00005 Aliphatic hydroxylation of methyl carbon adjacent to aromatic ring BTMR1058 "CYP1A2 +CYP2A6 +CYP2B6 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP2E1 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +5 CC(C)C1=CC=C(C)C=C1O "CC(C)c1c(cc(C)cc1O)O +" InChI=1S/C10H14O2/c1-6(2)10-8(11)4-7(3)5-9(10)12/h4-6,11-12H,1-3H3 TUWRZVAMHVWRER-UHFFFAOYSA-N CC(C)C1=C(C=C(C)C=C1O)O SCHEMBL1494319 12310887.0 C10H14O2 166.099379688 2.198500000000001 0 0 0 BTM00006 BTM00006 Hydroxylation of benzene on carbon ortho to electron donating group BTMR1045 "CYP1A2 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +6 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(C)c(c1O)O +" InChI=1S/C10H14O2/c1-6(2)8-5-4-7(3)9(11)10(8)12/h4-6,11-12H,1-3H3 LYUBXLHGANLIMX-UHFFFAOYSA-N CC(C)C1=CC=C(C)C(=C1O)O "Cymopyrocatechol +490-06-2 +3-isopropyl-6-methylcatechol +p-cymene-2,3-diol +3-Isopropyl-6-methylpyrocatechol +Pyrocatechol, 2-isopropyl-6-methyl- +UNII-93XFQ715UL +93XFQ715UL +NSC 40567 +3-isopropyl-6-methylbenzene-1,2-diol +BRN 2248022 +3-Isopropyl-6-Methyl-Benzene-1,2-Diol +NSC40567 +p-Cymene-2,3-diol (7CI,8CI) +3-methyl-6-propan-2-ylbenzene-1,2-diol +SCHEMBL1494556 +1,2-Benzenediol, 3-methyl-6-(1-methylethyl)- (9CI) +DTXSID10197652 +5722AF +NSC-40567 +ZINC96035800 +AKOS006275160 +MCULE-2488475103 +3-methyl-6-propan-2-yl-benzene-1,2-diol +3-methyl-6-(propan-2-yl)benzene-1,2-diol +A828568" 95873.0 C10H14O2 166.099379688 2.198500000000001 0 0 0 BTM00007 BTM00007 Hydroxylation of benzene on carbon ortho to electron donating group BTMR1045 "CYP1A2 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +7 CC(C)C1=CC=C(C)C=C1O "Cc1ccc(C(C)CO)c(c1)O +" InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3 CLJPRXFHCRIUKW-UHFFFAOYSA-N C(C(C)C1=CC=C(C)C=C1O)O "9-Hydroxythymol +61955-76-8 +2-(1-hydroxypropan-2-yl)-5-methylphenol +p-cymene-3,8-diol +p-Mentha-1,3,5-triene-3,9-diol" 14432748.0 C10H14O2 166.099379688 1.5777000000000003 0 0 0 BTM00008 BTM00008 Hydroxylation of terminal methyl BTMR1061 "CYP1A2 +CYP2A6 +CYP2B6 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP2E1 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +8 CC(C)C1=CC=C(C)C=C1O "CC(C)C1=C(C=C(C)C2C1O2)O +" InChI=1S/C10H14O2/c1-5(2)8-7(11)4-6(3)9-10(8)12-9/h4-5,9-11H,1-3H3 DETRTAMOFLJUIH-UHFFFAOYSA-N CC(C)C=1C2C(C(C)=CC1O)O2 C10H14O2 166.099379688 0.9412999999999996 0 0 0 BTM00009 BTM00009 Epoxidation of arene BTMR1028 "CYP1A2 +CYP2B6 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2E1 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044
--- a/test-data/output3.csv Wed Jan 13 11:17:53 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,120 +0,0 @@ -,SMILES query,SMILES target,InChI,InChIKey,SMILES,Synonyms,PUBCHEM_CID,Molecular formula,Major Isotope Mass,ALogP,Lipinski_Violations,Insecticide_Likeness_Violations,Post_Em_Herbicide_Likeness_Violations,Metabolite ID,cdk:Title,Reaction,Reaction ID,Enzyme(s),Biosystem,Precursor ID,Precursor SMILES,Precursor InChI,Precursor InChIKey,Precursor ALogP,Precursor Major Isotope Mass -0,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(c(c1)O)C(C)(C)O -","InChI=1S/C10H14O2/c1-7-4-5-8(9(11)6-7)10(2,3)12/h4-6,11-12H,1-3H3",UWRRYLNXMGBJKK-UHFFFAOYSA-N,CC(C)(C1=CC=C(C)C=C1O)O,,,C10H14O2,166.099379688,2.0267999999999997,0,0,0,BTM00001,BTM00001,Hydroxylation of penultimate aliphatic carbon adjacent to aromatic carbon,BTMR1076,"CYP1A2 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -2,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1cc(c(C)cc1O)O -","InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3",OQIOHYHRGZNZCW-UHFFFAOYSA-N,CC(C)C1=CC(=C(C)C=C1O)O,"Thymohydroquinone -Thymoquinol -Hydrothymoquinone -2217-60-9 -p-Cymene-2,5-diol -1,4-Benzenediol, 2-methyl-5-(1-methylethyl)- -2-Methyl-5-isopropylhydroquinone -Hydroquinone, 5-isopropyl-2-methyl- -NSC 34803 -UNII-1C2ICM1R8V -BRN 2084452 -1C2ICM1R8V -2-methyl-5-propan-2-ylbenzene-1,4-diol -2-methyl-5-(1-methylethyl)-1,4-benzenediol -2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL -Thymohydrochinon -Thymohydroquinone (I) -SCHEMBL69082 -p-Cymene-2,5-diol (8CI) -CHEMBL4204349 -CTK8H6569 -DTXSID70176706 -WLN: QR DQ B1 EY1&1 -NSC34803 -1, 2-methyl-5-(1-methylethyl)- -NSC-34803 -AKOS006274324 -ZINC100292063 -MCULE-6916835293 -2-isopropyl-5-methyl-benzene-1,4-diol -LS-77299 -5-methyl-2-(methylethyl)benzene-1,4-diol -2-methyl-5-(propan-2-yl)benzene-1,4-diol -FT-0700031 -ST51045581 -IMW",95779.0,C10H14O2,166.099379688,2.198500000000001,0,0,0,BTM00002,BTM00002,p-Hydroxylation of phenol,BTMR1038,"CYP1A2 -CYP2A6 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -3,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(cc1O)CO -","InChI=1S/C10H14O2/c1-7(2)9-4-3-8(6-11)5-10(9)12/h3-5,7,11-12H,6H2,1-2H3",UNNQYEJIPIBHFS-UHFFFAOYSA-N,CC(C)C1=CC=C(CO)C=C1O,"77311-68-3 -CTK2G0272 -5-hydroxymethyl-2-isopropylphenol -DTXSID70554040 -2-Isopropyl-5-(hydroxymethyl)phenol -5-(Hydroxymethyl)-2-(propan-2-yl)phenol -Benzenemethanol, 3-hydroxy-4-(1-methylethyl)-",14002478.0,C10H14O2,166.099379688,1.3752000000000006,0,0,0,BTM00003,BTM00003,Aliphatic hydroxylation of methyl carbon adjacent to aromatic ring,BTMR1058,"CYP1A2 -CYP2A6 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -6,CC(C)C1=CC=C(C)C=C1O,"Cc1ccc(C(C)CO)c(c1)O -","InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3",CLJPRXFHCRIUKW-UHFFFAOYSA-N,C(C(C)C1=CC=C(C)C=C1O)O,"9-Hydroxythymol -61955-76-8 -p-cymene-3,8-diol -p-Mentha-1,3,5-triene-3,9-diol",14432748.0,C10H14O2,166.099379688,1.5777000000000003,0,0,0,BTM00006,BTM00006,Hydroxylation of terminal methyl,BTMR1061,"CYP1A2 -CYP2A6 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2D6 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -7,CC(C)C1=CC=C(C)C=C1O,"C=C(C)c1ccc(C)cc1O -","InChI=1S/C10H12O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-6,11H,1H2,2-3H3",IHWFPRKZRRGTTI-UHFFFAOYSA-N,CC(=C)C1=CC=C(C)C=C1O,"8,9-Dehydrothymol -5-methyl-2-prop-1-en-2-ylphenol -2-isopropenyl-5-methylphenol -18612-99-2 -Phenol, 5-methyl-2-(1-methylethenyl)- -p-Cymen-8-en-3-ol -m-Cresol, 6-isopropenyl- -SCHEMBL686122 -2-Isopropenyl-5-methyl-phenol -CTK0E2283 -DTXSID60423892 -5-Methyl-2-(1-methylethenyl)phenol -5-Methyl-2-(prop-1-en-2-yl)phenol",6429037.0,C10H12O,148.088815004,3.0469000000000004,0,0,0,BTM00007,BTM00007,Terminal desaturation,BTMR1190,"CYP1A2 -CYP2A6 -CYP2C9 -CYP2D6 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -9,CC(C)C1=CC=C(C)C=C1O,"CC(C)C1=C(C=C(C)C2C1O2)O -","InChI=1S/C10H14O2/c1-5(2)8-7(11)4-6(3)9-10(8)12-9/h4-5,9-11H,1-3H3",DETRTAMOFLJUIH-UHFFFAOYSA-N,CC(C)C=1C2C(C(C)=CC1O)O2,,,C10H14O2,166.099379688,0.9412999999999996,0,0,0,BTM00008,BTM00008,Epoxidation of arene,BTMR1028,"CYP1A2 -CYP2B6 -CYP2C8 -CYP2C9 -CYP2C19 -CYP2E1 -CYP3A4",HUMAN,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -13,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)cc1OC1C(C(C(C(C(=O)O)O1)O)O)O -","InChI=1S/C16H22O7/c1-7(2)9-5-4-8(3)6-10(9)22-16-13(19)11(17)12(18)14(23-16)15(20)21/h4-7,11-14,16-19H,1-3H3,(H,20,21)",ADQJSAVCKZSGMK-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C=C1OC2OC(C(O)C(O)C2O)C(O)=O,"NSC404789 -NSC-404789 -3,5-trihydroxy-6-(2-isopropyl-5-methyl-phenoxy)tetrahydro-2H-pyran-2-carboxylic acid",346530.0,C16H22O7,326.13655304400004,0.6636999999999996,0,1,1,BTM00009,BTM00009,Aromatic OH-glucuronidation,BTMR0166,Bacterial UDP-glucuronosyltransferase,GUTMICRO,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044 -15,CC(C)C1=CC=C(C)C=C1O,"CC(C)c1ccc(C)cc1OS(=O)(=O)O -","InChI=1S/C10H14O4S/c1-7(2)9-5-4-8(3)6-10(9)14-15(11,12)13/h4-7H,1-3H3,(H,11,12,13)",NODSEPOUFZPJEQ-UHFFFAOYSA-N,CC(C)C1=CC=C(C)C=C1OS(O)(=O)=O,"thymol sulfate -Thymol sulphate -Thymol sulfuric acid -Thymol sulphuric acid -SCHEMBL235717 -CHEBI:82911 -5-methyl-2-(propan-2-yl)phenyl hydrogen sulfate -Q27156452",12456386.0,C10H14O4S,230.061279928,2.0731,0,0,0,BTM00010,BTM00010,3-OH-Sulfonation of phenolic compound,BTMR0196,SULFOTRANSFERASE,GUTMICRO,,CC(C)C1=CC=C(C)C=C1O,"InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3",MGSRCZKZVOBKFT-UHFFFAOYSA-N,,150.1044
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output3.tsv Mon Apr 11 10:09:39 2022 +0000 @@ -0,0 +1,98 @@ + SMILES query SMILES target InChI InChIKey SMILES Synonyms PUBCHEM_CID Molecular formula Major Isotope Mass ALogP Lipinski_Violations Insecticide_Likeness_Violations Post_Em_Herbicide_Likeness_Violations Metabolite ID cdk:Title Reaction Reaction ID Enzyme(s) Biosystem Precursor ID Precursor SMILES Precursor InChI Precursor InChIKey Precursor ALogP Precursor Major Isotope Mass +0 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(C)cc1OC1C(C(C(C(C(=O)O)O1)O)O)O +" InChI=1S/C16H22O7/c1-7(2)9-5-4-8(3)6-10(9)22-16-13(19)11(17)12(18)14(23-16)15(20)21/h4-7,11-14,16-19H,1-3H3,(H,20,21) ADQJSAVCKZSGMK-UHFFFAOYSA-N CC(C)C1=CC=C(C)C=C1OC2OC(C(O)C(O)C2O)C(O)=O "NSC404789 +NSC-404789 +(2S,3S,4S,5R)-3,4,5-trihydroxy-6-(5-methyl-2-propan-2-ylphenoxy)oxane-2-carboxylic acid +3,5-trihydroxy-6-(2-isopropyl-5-methyl-phenoxy)tetrahydro-2H-pyran-2-carboxylic acid" 346530.0 C16H22O7 326.13655304400004 0.6636999999999996 0 1 1 BTM00001 BTM00001 Aromatic OH-glucuronidation BTMR0166 EC 2.4.1.17 HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +1 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(C)cc1OS(=O)(=O)O +" InChI=1S/C10H14O4S/c1-7(2)9-5-4-8(3)6-10(9)14-15(11,12)13/h4-7H,1-3H3,(H,11,12,13) NODSEPOUFZPJEQ-UHFFFAOYSA-N CC(C)C1=CC=C(C)C=C1OS(O)(=O)=O "thymol sulfate +Thymol sulphate +Thymol sulfuric acid +Thymol sulphuric acid +SCHEMBL235717 +CHEBI:82911 +5-methyl-2-(propan-2-yl)phenyl hydrogen sulfate +Q27156452" 12456386.0 C10H14O4S 230.061279928 2.0731 0 0 0 BTM00002 BTM00002 3-OH-Sulfonation of phenolic compound BTMR0196 EC 2.8.2.1 HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +2 CC(C)C1=CC=C(C)C=C1O "Cc1ccc(c(c1)O)C(C)(C)O +" InChI=1S/C10H14O2/c1-7-4-5-8(9(11)6-7)10(2,3)12/h4-6,11-12H,1-3H3 UWRRYLNXMGBJKK-UHFFFAOYSA-N CC(C)(C1=CC=C(C)C=C1O)O SCHEMBL22652590 11332674.0 C10H14O2 166.099379688 2.0267999999999997 0 0 0 BTM00003 BTM00003 Hydroxylation of penultimate aliphatic carbon adjacent to aromatic carbon BTMR1076 "CYP1A2 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +3 CC(C)C1=CC=C(C)C=C1O "CC(C)c1cc(c(C)cc1O)O +" InChI=1S/C10H14O2/c1-6(2)8-5-9(11)7(3)4-10(8)12/h4-6,11-12H,1-3H3 OQIOHYHRGZNZCW-UHFFFAOYSA-N CC(C)C1=CC(=C(C)C=C1O)O "Thymohydroquinone +Thymoquinol +Hydrothymoquinone +2217-60-9 +1,4-Benzenediol, 2-methyl-5-(1-methylethyl)- +p-Cymene-2,5-diol +2-Methyl-5-isopropylhydroquinone +Hydroquinone, 5-isopropyl-2-methyl- +NSC 34803 +2-ISOPROPYL-5-METHYLBENZENE-1,4-DIOL +UNII-1C2ICM1R8V +1C2ICM1R8V +2-methyl-5-(1-methylethyl)-1,4-benzenediol +BRN 2084452 +Thymohydrochinon +Thymohydroquinone (I) +2-methyl-5-propan-2-ylbenzene-1,4-diol +SCHEMBL69082 +p-Cymene-2,5-diol (8CI) +CHEMBL4204349 +DTXSID70176706 +WLN: QR DQ B1 EY1&1 +NSC34803 +1, 2-methyl-5-(1-methylethyl)- +NSC-34803 +AKOS006274324 +ZINC100292063 +MCULE-6916835293 +2-isopropyl-5-methyl-benzene-1,4-diol +2-methyl-5-(propan-2-yl)benzene-1,4-diol +FT-0700031 +IMW" 95779.0 C10H14O2 166.099379688 2.198500000000001 0 0 0 BTM00004 BTM00004 p-Hydroxylation of phenol BTMR1038 "CYP1A2 +CYP2A6 +CYP2B6 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP2E1 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +4 CC(C)C1=CC=C(C)C=C1O "CC(C)c1ccc(cc1O)CO +" InChI=1S/C10H14O2/c1-7(2)9-4-3-8(6-11)5-10(9)12/h3-5,7,11-12H,6H2,1-2H3 UNNQYEJIPIBHFS-UHFFFAOYSA-N CC(C)C1=CC=C(CO)C=C1O "77311-68-3 +5-hydroxymethyl-2-isopropylphenol +DTXSID70554040 +2-Isopropyl-5-(hydroxymethyl)phenol +5-(Hydroxymethyl)-2-(propan-2-yl)phenol" 14002478.0 C10H14O2 166.099379688 1.3752000000000006 0 0 0 BTM00005 BTM00005 Aliphatic hydroxylation of methyl carbon adjacent to aromatic ring BTMR1058 "CYP1A2 +CYP2A6 +CYP2B6 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP2E1 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +7 CC(C)C1=CC=C(C)C=C1O "Cc1ccc(C(C)CO)c(c1)O +" InChI=1S/C10H14O2/c1-7-3-4-9(8(2)6-11)10(12)5-7/h3-5,8,11-12H,6H2,1-2H3 CLJPRXFHCRIUKW-UHFFFAOYSA-N C(C(C)C1=CC=C(C)C=C1O)O "9-Hydroxythymol +61955-76-8 +2-(1-hydroxypropan-2-yl)-5-methylphenol +p-cymene-3,8-diol +p-Mentha-1,3,5-triene-3,9-diol" 14432748.0 C10H14O2 166.099379688 1.5777000000000003 0 0 0 BTM00008 BTM00008 Hydroxylation of terminal methyl BTMR1061 "CYP1A2 +CYP2A6 +CYP2B6 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2D6 +CYP2E1 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044 +8 CC(C)C1=CC=C(C)C=C1O "CC(C)C1=C(C=C(C)C2C1O2)O +" InChI=1S/C10H14O2/c1-5(2)8-7(11)4-6(3)9-10(8)12-9/h4-5,9-11H,1-3H3 DETRTAMOFLJUIH-UHFFFAOYSA-N CC(C)C=1C2C(C(C)=CC1O)O2 C10H14O2 166.099379688 0.9412999999999996 0 0 0 BTM00009 BTM00009 Epoxidation of arene BTMR1028 "CYP1A2 +CYP2B6 +CYP2C8 +CYP2C9 +CYP2C19 +CYP2E1 +CYP3A4" HUMAN NSC404789 CC(C)C1=CC=C(C)C=C1O InChI=1S/C10H14O/c1-7(2)9-5-4-8(3)6-10(9)11/h4-7,11H,1-3H3 MGSRCZKZVOBKFT-UHFFFAOYSA-N 150.1044
--- a/wrapper_biotransformer.py Wed Jan 13 11:17:53 2021 +0000 +++ b/wrapper_biotransformer.py Mon Apr 11 10:09:39 2022 +0000 @@ -8,8 +8,8 @@ openbabel.obErrorLog.StopLogging() -# function for translating inchi to smiles def InchiToSmiles(df): + '''Translate inchi to smiles''' sm = [] for item in df['InChI']: tmp = pybel.readstring("inchi", item) @@ -18,80 +18,58 @@ executable = ["biotransformer"] -# executable_r = ["Rscript", "inchi_to_smiles.r"] argv = sys.argv[1:] -if "-icsv" in argv: - icsv = argv.pop(argv.index("-icsv") + 1) - argv.remove("-icsv") +icsv = argv.pop(argv.index("-icsv") + 1) +argv.remove("-icsv") +ocsv = argv.pop(argv.index("-ocsv") + 1) +argv.remove("-ocsv") +ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1) +argv.remove("-ocsvDup") +ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1) +argv.remove("-ocsvDup2") - if "-ocsv" not in argv: - sys.stderr.write("excpected -ocsv parameter\n") - sys.exit(1) - ocsv = argv.pop(argv.index("-ocsv") + 1) - argv.remove("-ocsv") - ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1) - argv.remove("-ocsvDup") - ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1) - argv.remove("-ocsvDup2") +in_df = pandas.read_csv(icsv, header=None) +out_df1 = pandas.DataFrame() # all results +out_df2 = pandas.DataFrame() # filtered results based on 6 columns +out_df3 = pandas.DataFrame() # filtered results based on 3 columns - in_df = pandas.read_csv(icsv, header=None) - out_df1 = pandas.DataFrame() # all results - out_df2 = pandas.DataFrame() # filtered results based on 6 columns - out_df3 = pandas.DataFrame() # filtered results based on 3 columns - - tmp2 = pandas.DataFrame() - tmp3 = pandas.DataFrame() +smList1 = [] # list with smiles string +smList2 = [] +smList3 = [] +for _, (smiles,) in in_df.iterrows(): + with tempfile.NamedTemporaryFile() as out: + print("Working on compound: " + smiles) + if not re.search(r'\.', smiles): + subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) + try: + bio_out = pandas.read_csv(out.name) + tmp2 = bio_out.drop_duplicates(subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) + tmp3 = bio_out.drop_duplicates(subset=["Molecular formula", "Major Isotope Mass", "ALogP"]) - smList1 = [] # list with smiles string - smList2 = [] - smList3 = [] - for _, (smiles,) in in_df.iterrows(): - with tempfile.NamedTemporaryFile() as out: - print("Working on compound: " + smiles) - if not re.search(r'\.', smiles): - subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) - try: - tmp2 = pandas.read_csv(out.name) - tmp3 = pandas.read_csv(out.name) - tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) - tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"]) - smList2.append([smiles] * tmp2.shape[0]) - smList3.append([smiles] * tmp3.shape[0]) - out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)]) - out_df2 = pandas.concat([out_df2, tmp2]) - out_df3 = pandas.concat([out_df3, tmp3]) - smList1.append([smiles] * pandas.read_csv(out.name).shape[0]) - except pandas.errors.EmptyDataError: - continue - else: - print("ERROR: Input compound cannot be a mixture.") - smList1 = sum(smList1, []) # merge sublists into one list - smList2 = sum(smList2, []) - smList3 = sum(smList3, []) + smList1.append([smiles] * bio_out.shape[0]) + smList2.append([smiles] * tmp2.shape[0]) + smList3.append([smiles] * tmp3.shape[0]) - out_df1.insert(0, "SMILES query", smList1) - out_df1.drop_duplicates(inplace=True) - out_df1.insert(1, "SMILES target", InchiToSmiles(out_df1)) - out_df1.to_csv(ocsv) + out_df1 = pandas.concat([out_df1, bio_out]) + out_df2 = pandas.concat([out_df2, tmp2]) + out_df3 = pandas.concat([out_df3, tmp3]) + except pandas.errors.EmptyDataError: + continue + else: + print("ERROR: Input compound cannot be a mixture.") +smList1 = sum(smList1, []) # merge sublists into one list +smList2 = sum(smList2, []) +smList3 = sum(smList3, []) - out_df2.insert(0, "SMILES query", smList2) - out_df3.insert(0, "SMILES query", smList3) - out_df2.drop_duplicates(inplace=True) - out_df3.drop_duplicates(inplace=True) - out_df2.insert(1, "SMILES target", InchiToSmiles(out_df2)) - out_df3.insert(1, "SMILES target", InchiToSmiles(out_df3)) - # out_df.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) - out_df2.to_csv(ocsv_dup) - out_df3.to_csv(ocsv_dup2) -else: - # code = subprocess.run(executable + argv).returncode - # sys.exit(code) - subprocess.run(executable + argv) - smile = argv.pop(argv.index("-ismi") + 1) - tmp = pandas.DataFrame() - out = argv.pop(argv.index("-ocsv") + 1) - tmp = pandas.read_csv(out) # reads created output file - tmp.insert(0, "SMILES query", smile) # add SMILES string for query - tmp.insert(1, "SMILES target", InchiToSmiles(tmp)) # add SMILES string for target - tmp.to_csv(out) +out_df1.insert(0, "SMILES query", smList1) +out_df1.insert(1, "SMILES target", InchiToSmiles(out_df1)) +out_df1.to_csv(ocsv, sep ='\t') + +out_df2.insert(0, "SMILES query", smList2) +out_df2.insert(1, "SMILES target", InchiToSmiles(out_df2)) +out_df2.to_csv(ocsv_dup, sep ='\t') + +out_df3.insert(0, "SMILES query", smList3) +out_df3.insert(1, "SMILES target", InchiToSmiles(out_df3)) +out_df3.to_csv(ocsv_dup2, sep ='\t')