Mercurial > repos > recetox > biotransformer
changeset 1:362a66a3889c draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/biotransformer commit 833817386e90cca9ac4737e6857fcaf672f2a011"
author | recetox |
---|---|
date | Tue, 22 Sep 2020 14:42:15 +0000 |
parents | b5b62d04625a |
children | 3998017c374b |
files | biotransformer.xml wrapper_biotransformer.py |
diffstat | 2 files changed, 99 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/biotransformer.xml Thu Aug 27 12:55:38 2020 +0000 +++ b/biotransformer.xml Tue Sep 22 14:42:15 2020 +0000 @@ -1,10 +1,13 @@ -<tool id="biotransformer" name="BioTransformer" version="1.1.0"> - <requirements> - <container type="docker">registry.gitlab.ics.muni.cz:443/recetox/mass-spectrometry/biotransformer:1.1.0</container> - </requirements> - <command><![CDATA[ cd /biotransformer && python3 wrapper_biotransformer.py +<tool id="biotransformer" name="BioTransformer" version="1.1.5+galaxy0"> + <requirements> + <requirement type="package" version="1.1.5">biotransformer</requirement> + <requirement type="package" version="3.1.1">openbabel</requirement> + <requirement type="package" version="1.1.1">pandas</requirement> + <requirement type="package" version="3.7">python</requirement> + </requirements> + <command><![CDATA[ python3 '${__tool_directory__}/wrapper_biotransformer.py' -k $k - -b $b + -b $b -s $steps -t $tolerance -a @@ -20,7 +23,7 @@ #if $input.type_selector == "csv" -ocsv $output_file -ocsvDup $output_file2 - -ocsvDup2 $output_file3 + -ocsvDup2 $output_file3 #else if $input.type_selector == "smile" -ocsv $output_file #end if @@ -127,3 +130,4 @@ <citation type="doi">https://doi.org/10.1186/s13321-018-0324-5</citation> </citations> </tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wrapper_biotransformer.py Tue Sep 22 14:42:15 2020 +0000 @@ -0,0 +1,88 @@ +import subprocess +import sys +import tempfile + +import pandas +from openbabel import pybel + + +# function for translating inchi to smiles +def InchiToSmiles(df): + sm = [] + for item in df['InChI']: + tmp = pybel.readstring("inchi", item) + sm.append(tmp.write("smi")) + return(sm) + + +executable = ["biotransformer"] +# executable_r = ["Rscript", "inchi_to_smiles.r"] + +argv = sys.argv[1:] +if "-icsv" in argv: + icsv = argv.pop(argv.index("-icsv") + 1) + argv.remove("-icsv") + + if "-ocsv" not in argv: + sys.stderr.write("excpected -ocsv parameter\n") + sys.exit(1) + ocsv = argv.pop(argv.index("-ocsv") + 1) + argv.remove("-ocsv") + ocsv_dup = argv.pop(argv.index("-ocsvDup") + 1) + argv.remove("-ocsvDup") + ocsv_dup2 = argv.pop(argv.index("-ocsvDup2") + 1) + argv.remove("-ocsvDup2") + + in_df = pandas.read_csv(icsv, header=None) + out_df1 = pandas.DataFrame() # all results + out_df2 = pandas.DataFrame() # filtered results based on 6 columns + out_df3 = pandas.DataFrame() # filtered results based on 3 columns + + tmp2 = pandas.DataFrame() + tmp3 = pandas.DataFrame() + + smList1 = [] # list with smiles string + smList2 = [] + smList3 = [] + for _, (smiles,) in in_df.iterrows(): + with tempfile.NamedTemporaryFile() as out: + subprocess.run(executable + argv + ["-ismi", smiles] + ["-ocsv", out.name]) + tmp2 = pandas.read_csv(out.name) + tmp3 = pandas.read_csv(out.name) + tmp2.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) + tmp3.drop_duplicates(inplace=True, subset=["Molecular formula", "Major Isotope Mass", "ALogP"]) + smList2.append([smiles] * tmp2.shape[0]) + smList3.append([smiles] * tmp3.shape[0]) + out_df1 = pandas.concat([out_df1, pandas.read_csv(out.name)]) + out_df2 = pandas.concat([out_df2, tmp2]) + out_df3 = pandas.concat([out_df3, tmp3]) + smList1.append([smiles] * pandas.read_csv(out.name).shape[0]) + smList1 = sum(smList1, []) # merge sublists into one list + smList2 = sum(smList2, []) + smList3 = sum(smList3, []) + + out_df1.insert(0, "SMILES query", smList1) + out_df1.drop_duplicates(inplace=True) + out_df1.insert(1, "SMILES target", InchiToSmiles(out_df1)) + out_df1.to_csv(ocsv) + + out_df2.insert(0, "SMILES query", smList2) + out_df3.insert(0, "SMILES query", smList3) + out_df2.drop_duplicates(inplace=True) + out_df3.drop_duplicates(inplace=True) + out_df2.insert(1, "SMILES target", InchiToSmiles(out_df2)) + out_df3.insert(1, "SMILES target", InchiToSmiles(out_df3)) + # out_df.drop_duplicates(inplace=True, subset=["InChI", "InChIKey", "Synonyms", "Molecular formula", "Major Isotope Mass", "ALogP"]) + out_df2.to_csv(ocsv_dup) + out_df3.to_csv(ocsv_dup2) +else: + # code = subprocess.run(executable + argv).returncode + # sys.exit(code) + subprocess.run(executable + argv) + smile = argv.pop(argv.index("-ismi") + 1) + tmp = pandas.DataFrame() + out = argv.pop(argv.index("-ocsv") + 1) + tmp = pandas.read_csv(out) # reads created output file + tmp.insert(0, "SMILES query", smile) # add SMILES string for query + tmp.insert(1, "SMILES target", InchiToSmiles(tmp)) # add SMILES string for target + tmp.to_csv(out)