Mercurial > repos > bgruening > rdconf
diff sdf_to_tab.py @ 0:55a2082540a9 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author | bgruening |
---|---|
date | Sat, 04 Dec 2021 16:36:56 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sdf_to_tab.py Sat Dec 04 16:36:56 2021 +0000 @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +import argparse + +import pandas as pd +from rdkit import Chem + + +def sdf_to_tab(vars): + mols = Chem.SDMolSupplier(vars.inp, sanitize=False) + df = pd.DataFrame() # for output + + for n in range(len(mols)): + if mols[n]: + d = mols[n].GetPropsAsDict() + # filter dict for desired props + if vars.props.strip() == "": # none specified, return all + d = { + prop: val + for (prop, val) in d.items() + if not any(x in str(val) for x in ["\n", "\t"]) + } # remove items containing newlines or tabs + else: + d = { + prop: val + for (prop, val) in d.items() + if prop in vars.props.replace(" ", "").split(",") + } # remove items not requested via CLI + if vars.name: + d["SDFMoleculeName"] = mols[n].GetProp("_Name") + if vars.smiles: + d["SMILES"] = Chem.MolToSmiles(mols[n], isomericSmiles=False) + d["Index"] = int(n) + + df = df.append(d, ignore_index=True) + else: + print("Molecule could not be read - skipped.") + + df = df.astype({"Index": int}).set_index("Index") + sorted_cols = sorted(df.columns.values.tolist()) + df.to_csv(vars.out, sep="\t", header=vars.header, columns=sorted_cols) + + +def main(): + parser = argparse.ArgumentParser(description="Convert SDF to tabular") + parser.add_argument("--inp", "-i", help="The input file", required=True) + parser.add_argument("--out", "-o", help="The output file", required=True) + parser.add_argument( + "--props", + "-p", + help="Properties to filter (leave blank for all)", + required=True, + ) + parser.add_argument( + "--header", + "-t", + action="store_true", + help="Write property name as the first row.", + ) + parser.add_argument( + "--smiles", "-s", action="store_true", help="Include SMILES in output." + ) + parser.add_argument( + "--name", "-n", action="store_true", help="Include molecule name in output." + ) + sdf_to_tab(parser.parse_args()) + + +if __name__ == "__main__": + main()