Mercurial > repos > bgruening > molecule2gspan
changeset 1:5f97004c7f57 draft
Uploaded
author | bgruening |
---|---|
date | Tue, 15 Apr 2014 12:44:17 -0400 |
parents | 18eb78773d87 |
children | ec22d245d224 |
files | mol2gspan.py mol2gspan.xml repository_dependencies.xml tool_dependencies.xml |
diffstat | 4 files changed, 47 insertions(+), 44 deletions(-) [+] |
line wrap: on
line diff
--- a/mol2gspan.py Tue Oct 29 11:15:59 2013 -0400 +++ b/mol2gspan.py Tue Apr 15 12:44:17 2014 -0400 @@ -4,45 +4,53 @@ Converts a SD-file to a GSPAN file. """ - -import os, sys +import os +import sys import argparse +import openbabel +import pybel def main( args ): - begin = False - iid = 0 - graph_counter = 1 + for infile in args.infile: + file_extension = args.format or os.path.splitext( infile )[-1].lstrip('.') + + if not args.format and file_extension not in ['smi', 'sdf', 'inchi', 'mol']: + sys.exit('Could not guess the format from the file extension please specify with the --format option.') + + molecules = pybel.readfile(file_extension, infile) + for mol in molecules: + args.outfile.write( 't # id %s\n' % mol.title.strip() ) + for atom in openbabel.OBMolAtomIter( mol.OBMol): + label = atom.GetAtomicNum() + vertex_index = atom.GetIdx() + args.outfile.write('v %s %s\n' % (vertex_index, label)) - for line in args.infile: - if line.rstrip(): - if line.strip().endswith('END'): - begin = False - elif line.strip() == '$$$$': - graph_counter += 1 - iid = 0 - else: - # found header line, like: 21 21 0 0 0 0 0 0 0 0999 V2000 - if len(line.split()) >= 5 and line.split()[-1] == 'V2000': - args.outfile.write('t # id %s\n' % graph_counter) - begin=True - continue - # connection or coordinate/atom table - if len(line.split()) >= 4 and begin: - # coordinate/atom table - if not line.startswith('M'): - if line.split()[3].isalpha() or line.split()[3] == '*': - args.outfile.write( 'v %s %s \n' % (iid, line.split()[3]) ) - iid += 1 - else: - #connection table - id, node, edge, trash = line.split(None, 3) - args.outfile.write( 'e %s %s %s\n' % ( int(id) - 1 , int(node) -1, edge ) ) + for bond in openbabel.OBMolBondIter( mol.OBMol): + src_index = bond.GetBeginAtomIdx() + dest_index = bond.GetEndAtomIdx() + assert(src_index > 0) + assert(dest_index > 0) + if bond.IsAromatic(): + label = 'a' + elif bond.IsSingle(): + label = 's' + elif bond.IsDouble(): + label = 'd' + elif bond.IsTriple(): + label = 't' + atom1 = bond.GetBeginAtom() + atom2 = bond.GetEndAtom() + args.outfile.write('e %s %s %s\n' % (src_index, dest_index, label)) + + if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--infile', nargs='?', type=argparse.FileType('r'), - default=sys.stdin, help="Specify one or more input files") + parser.add_argument('-i', '--infile', nargs='*', + help="Specify one or more input files") + parser.add_argument('-f', '--format', + help="Format of the input file.") parser.add_argument('--outfile', type=argparse.FileType('w'), default=sys.stdout, help="Specify one output file") args = parser.parse_args()
--- a/mol2gspan.xml Tue Oct 29 11:15:59 2013 -0400 +++ b/mol2gspan.xml Tue Apr 15 12:44:17 2014 -0400 @@ -1,20 +1,15 @@ -<tool id="bg_mol2gspan" name="Molecule to gSpan" version="0.1"> +<tool id="bg_mol2gspan" name="Molecule to gSpan" version="0.2"> <description>converter</description> <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10" shared_inputs="" merge_outputs="outfile"></parallelism> <requirements> <requirement type="package" version="2.3.2">openbabel</requirement> <requirement type="set_environment">MOL2GSPAN_SCRIPT_PATH</requirement> </requirements> - <command> - - #if $infile.ext == 'sdf': - python \$MOL2GSPAN_SCRIPT_PATH/mol2gspan.py --infile $infile --outfile $outfile - - #else: - obabel -i $infile.ext -o sdf $infile ---errorlevel 1 | python \$MOL2GSPAN_SCRIPT_PATH/mol2gspan.py --infile - --outfile $outfile - - #end if - + <command interpreter="python"> + mol2gspan.py + --infile $infile + --outfile $outfile + --format $infile.ext </command> <inputs> <param format="smi,sdf,mol,inchi,mol2" name="infile" type="data"
--- a/repository_dependencies.xml Tue Oct 29 11:15:59 2013 -0400 +++ b/repository_dependencies.xml Tue Apr 15 12:44:17 2014 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0"?> <repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format) and the EDeN datatypes."> <repository changeset_revision="8714f927a6ee" name="molecule_datatypes" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> - <repository changeset_revision="59b3b6ce10bb" name="eden_toolbox" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="9262f801d739" name="eden_toolbox" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </repositories>
--- a/tool_dependencies.xml Tue Oct 29 11:15:59 2013 -0400 +++ b/tool_dependencies.xml Tue Apr 15 12:44:17 2014 -0400 @@ -1,7 +1,7 @@ <?xml version="1.0"?> <tool_dependency> <package name="openbabel" version="2.3.2"> - <repository changeset_revision="7601c962048a" name="package_openbabel_2_3" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="8426be5149ce" name="package_openbabel_2_3" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </package> <set_environment version="1.0"> <environment_variable action="set_to" name="MOL2GSPAN_SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable>