Mercurial > repos > bgruening > rdconf
comparison sdf_to_tab.py @ 0:55a2082540a9 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit c1d813d3f0fec60ea6efe8a11e59d98bfdc1636f"
author | bgruening |
---|---|
date | Sat, 04 Dec 2021 16:36:56 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:55a2082540a9 |
---|---|
1 #!/usr/bin/env python3 | |
2 import argparse | |
3 | |
4 import pandas as pd | |
5 from rdkit import Chem | |
6 | |
7 | |
8 def sdf_to_tab(vars): | |
9 mols = Chem.SDMolSupplier(vars.inp, sanitize=False) | |
10 df = pd.DataFrame() # for output | |
11 | |
12 for n in range(len(mols)): | |
13 if mols[n]: | |
14 d = mols[n].GetPropsAsDict() | |
15 # filter dict for desired props | |
16 if vars.props.strip() == "": # none specified, return all | |
17 d = { | |
18 prop: val | |
19 for (prop, val) in d.items() | |
20 if not any(x in str(val) for x in ["\n", "\t"]) | |
21 } # remove items containing newlines or tabs | |
22 else: | |
23 d = { | |
24 prop: val | |
25 for (prop, val) in d.items() | |
26 if prop in vars.props.replace(" ", "").split(",") | |
27 } # remove items not requested via CLI | |
28 if vars.name: | |
29 d["SDFMoleculeName"] = mols[n].GetProp("_Name") | |
30 if vars.smiles: | |
31 d["SMILES"] = Chem.MolToSmiles(mols[n], isomericSmiles=False) | |
32 d["Index"] = int(n) | |
33 | |
34 df = df.append(d, ignore_index=True) | |
35 else: | |
36 print("Molecule could not be read - skipped.") | |
37 | |
38 df = df.astype({"Index": int}).set_index("Index") | |
39 sorted_cols = sorted(df.columns.values.tolist()) | |
40 df.to_csv(vars.out, sep="\t", header=vars.header, columns=sorted_cols) | |
41 | |
42 | |
43 def main(): | |
44 parser = argparse.ArgumentParser(description="Convert SDF to tabular") | |
45 parser.add_argument("--inp", "-i", help="The input file", required=True) | |
46 parser.add_argument("--out", "-o", help="The output file", required=True) | |
47 parser.add_argument( | |
48 "--props", | |
49 "-p", | |
50 help="Properties to filter (leave blank for all)", | |
51 required=True, | |
52 ) | |
53 parser.add_argument( | |
54 "--header", | |
55 "-t", | |
56 action="store_true", | |
57 help="Write property name as the first row.", | |
58 ) | |
59 parser.add_argument( | |
60 "--smiles", "-s", action="store_true", help="Include SMILES in output." | |
61 ) | |
62 parser.add_argument( | |
63 "--name", "-n", action="store_true", help="Include molecule name in output." | |
64 ) | |
65 sdf_to_tab(parser.parse_args()) | |
66 | |
67 | |
68 if __name__ == "__main__": | |
69 main() |