# HG changeset patch # User bgruening # Date 1397580257 14400 # Node ID 5f97004c7f57a8454a4b982f58bec35805059536 # Parent 18eb78773d87bb814848708e713ad21d748b1083 Uploaded diff -r 18eb78773d87 -r 5f97004c7f57 mol2gspan.py --- a/mol2gspan.py Tue Oct 29 11:15:59 2013 -0400 +++ b/mol2gspan.py Tue Apr 15 12:44:17 2014 -0400 @@ -4,45 +4,53 @@ Converts a SD-file to a GSPAN file. """ - -import os, sys +import os +import sys import argparse +import openbabel +import pybel def main( args ): - begin = False - iid = 0 - graph_counter = 1 + for infile in args.infile: + file_extension = args.format or os.path.splitext( infile )[-1].lstrip('.') + + if not args.format and file_extension not in ['smi', 'sdf', 'inchi', 'mol']: + sys.exit('Could not guess the format from the file extension please specify with the --format option.') + + molecules = pybel.readfile(file_extension, infile) + for mol in molecules: + args.outfile.write( 't # id %s\n' % mol.title.strip() ) + for atom in openbabel.OBMolAtomIter( mol.OBMol): + label = atom.GetAtomicNum() + vertex_index = atom.GetIdx() + args.outfile.write('v %s %s\n' % (vertex_index, label)) - for line in args.infile: - if line.rstrip(): - if line.strip().endswith('END'): - begin = False - elif line.strip() == '$$$$': - graph_counter += 1 - iid = 0 - else: - # found header line, like: 21 21 0 0 0 0 0 0 0 0999 V2000 - if len(line.split()) >= 5 and line.split()[-1] == 'V2000': - args.outfile.write('t # id %s\n' % graph_counter) - begin=True - continue - # connection or coordinate/atom table - if len(line.split()) >= 4 and begin: - # coordinate/atom table - if not line.startswith('M'): - if line.split()[3].isalpha() or line.split()[3] == '*': - args.outfile.write( 'v %s %s \n' % (iid, line.split()[3]) ) - iid += 1 - else: - #connection table - id, node, edge, trash = line.split(None, 3) - args.outfile.write( 'e %s %s %s\n' % ( int(id) - 1 , int(node) -1, edge ) ) + for bond in openbabel.OBMolBondIter( mol.OBMol): + src_index = bond.GetBeginAtomIdx() + dest_index = bond.GetEndAtomIdx() + assert(src_index > 0) + assert(dest_index > 0) + if bond.IsAromatic(): + label = 'a' + elif bond.IsSingle(): + label = 's' + elif bond.IsDouble(): + label = 'd' + elif bond.IsTriple(): + label = 't' + atom1 = bond.GetBeginAtom() + atom2 = bond.GetEndAtom() + args.outfile.write('e %s %s %s\n' % (src_index, dest_index, label)) + + if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--infile', nargs='?', type=argparse.FileType('r'), - default=sys.stdin, help="Specify one or more input files") + parser.add_argument('-i', '--infile', nargs='*', + help="Specify one or more input files") + parser.add_argument('-f', '--format', + help="Format of the input file.") parser.add_argument('--outfile', type=argparse.FileType('w'), default=sys.stdout, help="Specify one output file") args = parser.parse_args() diff -r 18eb78773d87 -r 5f97004c7f57 mol2gspan.xml --- a/mol2gspan.xml Tue Oct 29 11:15:59 2013 -0400 +++ b/mol2gspan.xml Tue Apr 15 12:44:17 2014 -0400 @@ -1,20 +1,15 @@ - + converter openbabel MOL2GSPAN_SCRIPT_PATH - - - #if $infile.ext == 'sdf': - python \$MOL2GSPAN_SCRIPT_PATH/mol2gspan.py --infile $infile --outfile $outfile - - #else: - obabel -i $infile.ext -o sdf $infile ---errorlevel 1 | python \$MOL2GSPAN_SCRIPT_PATH/mol2gspan.py --infile - --outfile $outfile - - #end if - + + mol2gspan.py + --infile $infile + --outfile $outfile + --format $infile.ext - + diff -r 18eb78773d87 -r 5f97004c7f57 tool_dependencies.xml --- a/tool_dependencies.xml Tue Oct 29 11:15:59 2013 -0400 +++ b/tool_dependencies.xml Tue Apr 15 12:44:17 2014 -0400 @@ -1,7 +1,7 @@ - + $REPOSITORY_INSTALL_DIR