changeset 0:efd9c309b8f7

Uploaded
author bgruening
date Tue, 26 Mar 2013 14:50:53 -0400
parents
children 4a71d98176f6
files osra.xml readme repository_dependencies.xml test_data/CID_2244.png test_data/CID_2244.sdf tool_dependencies.xml
diffstat 6 files changed, 425 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/osra.xml	Tue Mar 26 14:50:53 2013 -0400
@@ -0,0 +1,38 @@
+<tool id="osra" name="Molecule recognition" version="0.2">
+    <description>in Images or PDF documents</description>
+    <requirements>
+        <requirement type="package" version="1.4.0">osra</requirement>
+    </requirements>
+    <command>
+        osra -f $oformat $infile > $outfile
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="png,pdf" label="Image or PDF with molecules"/>
+        <param name="oformat" type="select" label="Output molecule format">
+            <option value="can">SMILES</option>
+            <option value="sdf">SDF</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" type="data" format="sdf">
+            <change_format>
+                <when input="oformat" value="can" format="smi"/>
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" ftype="png" value="CID_2244.png"/>
+            <param name="oformat" value="sdf"/>
+            <output name="outfile" ftype="sdf" file="osra_on_CID2244.sdf"/>
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+OSRA detects molecules in images and converts them to standard molecule formats. 
+
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme	Tue Mar 26 14:50:53 2013 -0400
@@ -0,0 +1,165 @@
+OSRA: Optical Structure Recognition Application
+
+OSRA is a utility designed to convert graphical representations of chemical 
+structures, as they appear in journal articles, patent documents, textbooks, 
+trade magazines etc., into SMILES (Simplified Molecular Input Line Entry 
+Specification - see http://en.wikipedia.org/wiki/SMILES) or 
+SD files - a computer recognizable molecular structure format. 
+OSRA can read a document in any of the over 90 graphical formats parseable by 
+ImageMagick - including GIF, JPEG, PNG, TIFF, PDF, PS etc., and generate 
+the SMILES or SDF representation of the molecular structure images encountered 
+within that document.
+
+Note that any software designed for optical recognition is unlikely to be 
+perfect, and the output produced might, and probably will, contain errors, 
+so curation by a human knowledgeable in chemical structures is highly recommended.
+
+http://cactus.nci.nih.gov/osra/
+
+
+The wrapper comes with an automatic installation of all dependencies through the
+galaxy toolshed. If you do not want to use that feature you can compile OSRA with
+the following instructions.
+
+
+########################################
+#                                      #
+#           OSRA Setup                 #
+# currently a small patch is requiered #
+#                                      #
+########################################
+
+Save the following file to 'osra-1.4.0-configure.patch' and store it next 
+to that script below.
+
+-------------------------------------------------------
+--- configure	2013-03-18 18:14:00.888605219 +0100
++++ configure_ori	2013-03-18 18:13:48.396605759 +0100
+@@ -4178,7 +4178,8 @@
+ 
+ # Check whether --with-tclap-include was given.
+ if test "${with_tclap_include+set}" = set; then :
+-  withval=$with_tclap_include;
++    withval=$with_tclap_include;
++    CPPFLAGS="-I${withval} ${CPPFLAGS}"
+ else
+   with_tclap_include="auto"
+ fi
+-------------------------------------------------------
+
+#!/usr/bin/bash
+#osra installation
+
+cdir=`pwd`
+
+wget http://sourceforge.net/projects/graphicsmagick/files/graphicsmagick/1.3.18/GraphicsMagick-1.3.18.tar.gz
+tar xfvz GraphicsMagick-1.3.18.tar.gz
+cd GraphicsMagick-1.3.18
+./configure --prefix=`pwd`/build --enable-shared=yes
+make
+make install
+cd ..
+
+export PATH=$PATH:/home/bag/projects/osra/GraphicsMagick-1.3.18/build/bin/
+
+wget http://potrace.sourceforge.net/download/potrace-1.11.tar.gz
+tar xfvz potrace-1.11.tar.gz
+cd potrace-1.11
+./configure --with-libpotrace --prefix=`pwd`/build
+make
+make install
+cd ..
+
+wget http://downloads.sourceforge.net/project/osra/gocr-patched/gocr-0.50pre-patched.tgz
+tar xfvz gocr-0.50pre-patched.tgz
+cd gocr-0.50pre-patched
+./configure --prefix=`pwd`/build
+make libs
+make all install
+cd ..
+
+wget http://downloads.sourceforge.net/project/tclap/tclap-1.2.1.tar.gz
+tar xfvz tclap-1.2.1.tar.gz
+cd tclap-1.2.1
+./configure --prefix=`pwd`/build
+make
+make install
+cd ..
+
+wget http://mirror.checkdomain.de/gnu/ocrad/ocrad-0.21.tar.gz
+tar xfvz ocrad-0.21.tar.gz
+cd ocrad-0.21
+./configure --prefix=`pwd`/build
+make
+make install
+cd ..
+
+wget http://downloads.sourceforge.net/project/openbabel/openbabel/2.3.2/openbabel-2.3.2.tar.gz
+tar xfvz openbabel-2.3.2.tar.gz
+cd openbabel-2.3.2
+mkdir build
+cd build
+cmake .. -DCMAKE_INSTALL_PREFIX=`pwd`/install/
+make
+make install
+cd ../..
+
+#wget http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz
+#tar xfvz tesseract-ocr-3.02.02.tar.gz
+#cd tesseract-ocr
+#./configure --prefix=`pwd`/build
+#make
+#make install
+#cd ..
+
+
+wget https://launchpad.net/cuneiform-linux/1.1/1.1/+download/cuneiform-linux-1.1.0.tar.bz2
+tar xfvj cuneiform-linux-1.1.0.tar.bz2
+cd cuneiform-linux-1.1.0
+mkdir build
+cd build
+cmake .. -DCMAKE_INSTALL_PREFIX=`pwd`/install/
+make
+make install
+cd ../..
+
+
+wget http://downloads.sourceforge.net/project/osra/osra/1.4.0/osra-1.4.0.tgz
+tar xfvz osra-1.4.0.tgz
+patch osra-1.4.0/configure < osra-1.4.0-configure.patch
+cd osra-1.4.0
+
+./configure --with-tclap-include=$cdir/tclap-1.2.1/build/include/ \
+--with-potrace-include=$cdir/potrace-1.11/build/include/ \
+--with-potrace-lib=$cdir/potrace-1.11/build/lib/ \
+--with-gocr-include=$cdir/gocr-0.50pre-patched/build/include/gocr/ \
+--with-gocr-lib=$cdir/gocr-0.50pre-patched/build/lib/ \
+--with-ocrad-include=$cdir/ocrad-0.21/build/include/ \
+--with-ocrad-lib=$cdir/ocrad-0.21/build/lib/ \
+--with-cuneiform-include=$cdir/cuneiform-linux-1.1.0/build/install/include/ \
+--with-cuneiform \
+--with-cuneiform-lib=$cdir/cuneiform-linux-1.1.0/build/install/lib/ \
+--with-openbabel-include=$cdir/openbabel-2.3.2/build/install/include/openbabel-2.0/ \
+--with-openbabel-lib=$cdir/openbabel-2.3.2/build/install/lib/ \
+--with-graphicsmagick-lib=$cdir/GraphicsMagick-1.3.18/build/lib/ \
+--with-graphicsmagick-include=$cdir/GraphicsMagick-1.3.18/build/include/GraphicsMagick/ \
+--prefix=`pwd`/bin/ 
+#--enable-static-linking --disable-graphicsmagick-config
+make
+make install
+
+export LD_LIBRARY_PATH=$cdir/GraphicsMagick-1.3.18/build/lib/:$cdir/GraphicsMagick-1.3.18/build/lib:$cdir/potrace-1.11/build/lib/
+./bin/bin/osra
+
+
+#--with-tesseract-include=$cdir/tclap-1.2.1/build/include/
+#--with-tesseract
+#--with-tesseract-lib
+
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Tue Mar 26 14:50:53 2013 -0400
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format).">
+    <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="molecule_datatypes" owner="bgruening" changeset_revision="7cb4c02f61e6" />
+</repositories>
Binary file test_data/CID_2244.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/CID_2244.sdf	Tue Mar 26 14:50:53 2013 -0400
@@ -0,0 +1,155 @@
+2244
+  -OEChem-05151212332D
+
+ 21 21  0     0  0  0  0  0  0999 V2000
+    3.7320   -0.0600    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301    1.4400    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981    1.4400    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    2.8660   -1.5600    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -0.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.5981   -1.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -2.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301   -1.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641    0.9400    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.8660   -0.5600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    2.0000   -0.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+    4.0611   -1.8700    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.8671   -0.2500    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    5.4641   -2.6800    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.8671   -1.8700    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    2.3100    0.4769    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.4631    0.2500    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    1.6900   -0.5969    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+    6.3301    2.0600    0.0000 H   0  0  0  0  0  0  0  0  0  0  0  0
+  1  5  1  0  0  0  0
+  1 12  1  0  0  0  0
+  2 11  1  0  0  0  0
+  2 21  1  0  0  0  0
+  3 11  2  0  0  0  0
+  4 12  2  0  0  0  0
+  5  6  1  0  0  0  0
+  5  7  2  0  0  0  0
+  6  8  2  0  0  0  0
+  6 11  1  0  0  0  0
+  7  9  1  0  0  0  0
+  7 14  1  0  0  0  0
+  8 10  1  0  0  0  0
+  8 15  1  0  0  0  0
+  9 10  2  0  0  0  0
+  9 16  1  0  0  0  0
+ 10 17  1  0  0  0  0
+ 12 13  1  0  0  0  0
+ 13 18  1  0  0  0  0
+ 13 19  1  0  0  0  0
+ 13 20  1  0  0  0  0
+M  END
+> <PUBCHEM_COMPOUND_CID>
+2244
+
+> <PUBCHEM_COMPOUND_CANONICALIZED>
+1
+
+> <PUBCHEM_CACTVS_COMPLEXITY>
+212
+
+> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
+4
+
+> <PUBCHEM_CACTVS_HBOND_DONOR>
+1
+
+> <PUBCHEM_CACTVS_ROTATABLE_BOND>
+3
+
+> <PUBCHEM_CACTVS_SUBSKEYS>
+AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA==
+
+> <PUBCHEM_IUPAC_OPENEYE_NAME>
+2-acetoxybenzoic acid
+
+> <PUBCHEM_IUPAC_CAS_NAME>
+2-acetyloxybenzoic acid
+
+> <PUBCHEM_IUPAC_NAME>
+2-acetyloxybenzoic acid
+
+> <PUBCHEM_IUPAC_SYSTEMATIC_NAME>
+2-acetyloxybenzoic acid
+
+> <PUBCHEM_IUPAC_TRADITIONAL_NAME>
+2-acetoxybenzoic acid
+
+> <PUBCHEM_IUPAC_INCHI>
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
+
+> <PUBCHEM_IUPAC_INCHIKEY>
+BSYNRYMUTXBXSQ-UHFFFAOYSA-N
+
+> <PUBCHEM_XLOGP3>
+1.2
+
+> <PUBCHEM_EXACT_MASS>
+180.042259
+
+> <PUBCHEM_MOLECULAR_FORMULA>
+C9H8O4
+
+> <PUBCHEM_MOLECULAR_WEIGHT>
+180.15742
+
+> <PUBCHEM_OPENEYE_CAN_SMILES>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+> <PUBCHEM_OPENEYE_ISO_SMILES>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+> <PUBCHEM_CACTVS_TPSA>
+63.6
+
+> <PUBCHEM_MONOISOTOPIC_WEIGHT>
+180.042259
+
+> <PUBCHEM_TOTAL_CHARGE>
+0
+
+> <PUBCHEM_HEAVY_ATOM_COUNT>
+13
+
+> <PUBCHEM_ATOM_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ATOM_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_DEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_BOND_UDEF_STEREO_COUNT>
+0
+
+> <PUBCHEM_ISOTOPIC_ATOM_COUNT>
+0
+
+> <PUBCHEM_COMPONENT_COUNT>
+1
+
+> <PUBCHEM_CACTVS_TAUTO_COUNT>
+1
+
+> <PUBCHEM_COORDINATE_TYPE>
+1
+5
+255
+
+> <PUBCHEM_BONDANNOTATIONS>
+5  6  8
+5  7  8
+6  8  8
+7  9  8
+8  10  8
+9  10  8
+
+$$$$
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Mar 26 14:50:53 2013 -0400
@@ -0,0 +1,63 @@
+<tool_dependency>
+    <package name="osra" version="1.4.0">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://downloads.sourceforge.net/project/osra/osra/1.4.0/osra-1.4.0.tgz</action>
+                <!--compiling GraphicsMagick-1.3.18 -->
+                <action type="shell_command">wget http://sourceforge.net/projects/graphicsmagick/files/graphicsmagick/1.3.18/GraphicsMagick-1.3.18.tar.gz</action>
+                <action type="shell_command">tar xfvz GraphicsMagick-1.3.18.tar.gz &amp;&amp; cd GraphicsMagick-1.3.18 &amp;&amp; ./configure --prefix=$INSTALL_DIR/gmagick/build --enable-shared=yes &amp;&amp; make &amp;&amp; make install</action>
+
+                <!--compiling potrace-1.11 -->
+                <action type="shell_command">wget http://potrace.sourceforge.net/download/potrace-1.11.tar.gz</action>
+                <action type="shell_command">tar xfvz potrace-1.11.tar.gz &amp;&amp; cd potrace-1.11 &amp;&amp; ./configure --with-libpotrace --prefix=$INSTALL_DIR/potrace/build &amp;&amp; make &amp;&amp; make install</action>
+
+                <!--compiling gocr 0.50patched -->
+                <action type="shell_command">wget http://downloads.sourceforge.net/project/osra/gocr-patched/gocr-0.50pre-patched.tgz</action>
+                <action type="shell_command">tar xfvz gocr-0.50pre-patched.tgz &amp;&amp; cd gocr-0.50pre-patched &amp;&amp; ./configure --prefix=$INSTALL_DIR/gocr/build &amp;&amp; make libs &amp;&amp; make all install</action>
+
+                <!--compiling tclap 1.2.1 -->
+                <action type="shell_command">wget http://downloads.sourceforge.net/project/tclap/tclap-1.2.1.tar.gz</action>
+                <action type="shell_command">tar xfvz tclap-1.2.1.tar.gz &amp;&amp; cd tclap-1.2.1 &amp;&amp; ./configure --prefix=$INSTALL_DIR/tclap/build &amp;&amp; make &amp;&amp; make install</action>
+
+
+                <!--compiling ocrad 0.21 -->
+                <action type="shell_command">wget http://mirror.checkdomain.de/gnu/ocrad/ocrad-0.21.tar.gz</action>
+                <action type="shell_command">tar xfvz ocrad-0.21.tar.gz &amp;&amp; cd ocrad-0.21 &amp;&amp; ./configure --prefix=$INSTALL_DIR/ocrad/build &amp;&amp; make &amp;&amp; make install</action>
+
+                <!--compiling openbabel 2.3.2 -->
+                <action type="shell_command">wget http://downloads.sourceforge.net/project/openbabel/openbabel/2.3.2/openbabel-2.3.2.tar.gz</action>
+                <action type="shell_command">tar xfvz openbabel-2.3.2.tar.gz &amp;&amp; cd openbabel-2.3.2 &amp;&amp; cmake . -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/openbabel/build &amp;&amp; make &amp;&amp; make install</action>
+
+                <!--compiling cuneiform 1.1.0 -->
+                <action type="shell_command">wget https://launchpad.net/cuneiform-linux/1.1/1.1/+download/cuneiform-linux-1.1.0.tar.bz2</action>
+                <action type="shell_command">tar xfvj cuneiform-linux-1.1.0.tar.bz2 &amp;&amp; cd cuneiform-linux-1.1.0 &amp;&amp; mkdir build &amp;&amp; cd build &amp;&amp; cmake .. -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/cuneiform/build/ &amp;&amp; make &amp;&amp; make install</action>
+
+                <!--And finally OSRA 1.4.0-->
+                <!--<action type="download_by_url">http://downloads.sourceforge.net/project/osra/osra/1.4.0/osra-1.4.0.tgz</action>-->
+                <action type="shell_command">wget http://share.gruenings.eu/osra-1.4.0-configure.patch</action>
+                <action type="shell_command">patch configure &lt; osra-1.4.0-configure.patch</action>
+                <!-- Extending the PATH is needed for OSRA -->
+                <action type="shell_command">export PATH=$PATH:$INSTALL_DIR/gmagick/build/bin/ &amp;&amp; ./configure --with-tclap-include=$INSTALL_DIR/tclap/build/include/ --with-potrace-include=$INSTALL_DIR/potrace/build/include/ --with-potrace-lib=$INSTALL_DIR/potrace/build/lib/ --with-gocr-include=$INSTALL_DIR/gocr/build/include/gocr/ --with-gocr-lib=$INSTALL_DIR/gocr/build/lib/ --with-ocrad-include=$INSTALL_DIR/ocrad/build/include/ --with-ocrad-lib=$INSTALL_DIR/ocrad/build/lib/ --with-cuneiform-include=$INSTALL_DIR/cuneiform/build/install/include/ --with-cuneiform --with-cuneiform-lib=$INSTALL_DIR/cuneiform/build/install/lib/ --with-openbabel-include=$INSTALL_DIR/openbabel/build/include/openbabel-2.0/ --with-openbabel-lib=$INSTALL_DIR/openbabel/build/lib/ --with-graphicsmagick-lib=$INSTALL_DIR/gmagick/build/lib/ --with-graphicsmagick-include=$INSTALL_DIR/gmagick/build/include/GraphicsMagick/ --prefix=$INSTALL_DIR </action>
+                <action type="shell_command">make</action>
+                <action type="shell_command">make install</action>
+
+                <action type="shell_command">rm $INSTALL_DIR/tclap/ -r</action>
+                <action type="shell_command">rm $INSTALL_DIR/openbabel/ -r</action>
+                <action type="shell_command">rm $INSTALL_DIR/gocr/ -r</action>
+                <action type="shell_command">rm $INSTALL_DIR/ocrad/ -r</action>
+                <action type="shell_command">rm $INSTALL_DIR/cuneiform/ -r</action>
+
+                <action type="set_environment">
+                    <environment_variable name="LD_LIBRARY_PATH" action="prepend_to">$INSTALL_DIR/gmagick/build/lib/</environment_variable>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="LD_LIBRARY_PATH" action="prepend_to">$INSTALL_DIR/potrace/build/lib/</environment_variable>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>We still have a handfull of requirements</readme>
+    </package>
+</tool_dependency>