changeset 1:5f97004c7f57 draft

Uploaded
author bgruening
date Tue, 15 Apr 2014 12:44:17 -0400
parents 18eb78773d87
children ec22d245d224
files mol2gspan.py mol2gspan.xml repository_dependencies.xml tool_dependencies.xml
diffstat 4 files changed, 47 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- a/mol2gspan.py	Tue Oct 29 11:15:59 2013 -0400
+++ b/mol2gspan.py	Tue Apr 15 12:44:17 2014 -0400
@@ -4,45 +4,53 @@
     Converts a SD-file to a GSPAN file.
 """
 
-
-import os, sys
+import os
+import sys
 import argparse
+import openbabel
+import pybel
 
 def main( args ):
 
-    begin = False
-    iid = 0
-    graph_counter = 1
+    for infile in args.infile:
+        file_extension = args.format or os.path.splitext( infile )[-1].lstrip('.')
+
+        if not args.format and file_extension not in ['smi', 'sdf', 'inchi', 'mol']:
+            sys.exit('Could not guess the format from the file extension please specify with the --format option.')
+
+        molecules = pybel.readfile(file_extension, infile)
+        for mol in molecules:
+            args.outfile.write( 't # id %s\n' % mol.title.strip() )
+            for atom in openbabel.OBMolAtomIter( mol.OBMol):
+                label = atom.GetAtomicNum()
+                vertex_index = atom.GetIdx()
+                args.outfile.write('v %s %s\n' % (vertex_index, label))
 
-    for line in args.infile:
-        if line.rstrip():
-            if line.strip().endswith('END'):
-                begin = False
-            elif line.strip() == '$$$$':
-                graph_counter += 1
-                iid = 0
-            else:
-                # found header line, like:  21 21  0  0  0  0  0  0  0  0999 V2000
-                if len(line.split()) >= 5 and line.split()[-1] == 'V2000':
-                    args.outfile.write('t # id %s\n' % graph_counter)
-                    begin=True
-                    continue
-                # connection or coordinate/atom table
-                if len(line.split()) >= 4 and begin:
-                    # coordinate/atom table
-                    if not line.startswith('M'):
-                        if line.split()[3].isalpha() or line.split()[3] == '*':
-                            args.outfile.write( 'v %s %s \n' % (iid, line.split()[3]) )
-                            iid += 1
-                        else:
-                            #connection table
-                            id, node, edge, trash = line.split(None, 3)
-                            args.outfile.write( 'e %s %s %s\n' % ( int(id) - 1 , int(node) -1, edge ) )
+            for bond in openbabel.OBMolBondIter( mol.OBMol):
+                src_index = bond.GetBeginAtomIdx()
+                dest_index = bond.GetEndAtomIdx()
+                assert(src_index > 0)
+                assert(dest_index > 0)
+                if bond.IsAromatic():
+                    label = 'a'
+                elif bond.IsSingle():
+                    label = 's'
+                elif bond.IsDouble():
+                    label = 'd'
+                elif bond.IsTriple():
+                    label = 't'
+                atom1 = bond.GetBeginAtom()
+                atom2 = bond.GetEndAtom()
+                args.outfile.write('e %s %s %s\n' % (src_index, dest_index, label))
+
+
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('--infile', nargs='?', type=argparse.FileType('r'),
-        default=sys.stdin, help="Specify one or more input files")
+    parser.add_argument('-i', '--infile', nargs='*',
+        help="Specify one or more input files")
+    parser.add_argument('-f', '--format',
+        help="Format of the input file.")
     parser.add_argument('--outfile', type=argparse.FileType('w'),
         default=sys.stdout, help="Specify one output file")
     args = parser.parse_args()
--- a/mol2gspan.xml	Tue Oct 29 11:15:59 2013 -0400
+++ b/mol2gspan.xml	Tue Apr 15 12:44:17 2014 -0400
@@ -1,20 +1,15 @@
-<tool id="bg_mol2gspan" name="Molecule to gSpan" version="0.1">
+<tool id="bg_mol2gspan" name="Molecule to gSpan" version="0.2">
     <description>converter</description>
     <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10" shared_inputs="" merge_outputs="outfile"></parallelism>
     <requirements>
         <requirement type="package" version="2.3.2">openbabel</requirement>
         <requirement type="set_environment">MOL2GSPAN_SCRIPT_PATH</requirement>
     </requirements>
-    <command>
-
-        #if $infile.ext == 'sdf':
-            python \$MOL2GSPAN_SCRIPT_PATH/mol2gspan.py --infile $infile --outfile $outfile
-
-        #else:
-            obabel -i $infile.ext -o sdf $infile ---errorlevel 1 | python \$MOL2GSPAN_SCRIPT_PATH/mol2gspan.py --infile - --outfile $outfile
-
-        #end if
-
+    <command interpreter="python">
+        mol2gspan.py 
+            --infile $infile
+            --outfile $outfile
+            --format $infile.ext
     </command>
     <inputs>
         <param format="smi,sdf,mol,inchi,mol2" name="infile" type="data" 
--- a/repository_dependencies.xml	Tue Oct 29 11:15:59 2013 -0400
+++ b/repository_dependencies.xml	Tue Apr 15 12:44:17 2014 -0400
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
 <repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format) and the EDeN datatypes.">
     <repository changeset_revision="8714f927a6ee" name="molecule_datatypes" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
-    <repository changeset_revision="59b3b6ce10bb" name="eden_toolbox" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+    <repository changeset_revision="9262f801d739" name="eden_toolbox" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" />
 </repositories>
--- a/tool_dependencies.xml	Tue Oct 29 11:15:59 2013 -0400
+++ b/tool_dependencies.xml	Tue Apr 15 12:44:17 2014 -0400
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <tool_dependency>
     <package name="openbabel" version="2.3.2">
-        <repository changeset_revision="7601c962048a" name="package_openbabel_2_3" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="8426be5149ce" name="package_openbabel_2_3" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" />
     </package>
     <set_environment version="1.0">
         <environment_variable action="set_to" name="MOL2GSPAN_SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable>