# HG changeset patch
# User bgruening
# Date 1364323853 14400
# Node ID efd9c309b8f7466d5bd910f526933772e77580cb
Uploaded
diff -r 000000000000 -r efd9c309b8f7 osra.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/osra.xml Tue Mar 26 14:50:53 2013 -0400
@@ -0,0 +1,38 @@
+
+ in Images or PDF documents
+
+ osra
+
+
+ osra -f $oformat $infile > $outfile
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+OSRA detects molecules in images and converts them to standard molecule formats.
+
+
+
+
diff -r 000000000000 -r efd9c309b8f7 readme
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readme Tue Mar 26 14:50:53 2013 -0400
@@ -0,0 +1,165 @@
+OSRA: Optical Structure Recognition Application
+
+OSRA is a utility designed to convert graphical representations of chemical
+structures, as they appear in journal articles, patent documents, textbooks,
+trade magazines etc., into SMILES (Simplified Molecular Input Line Entry
+Specification - see http://en.wikipedia.org/wiki/SMILES) or
+SD files - a computer recognizable molecular structure format.
+OSRA can read a document in any of the over 90 graphical formats parseable by
+ImageMagick - including GIF, JPEG, PNG, TIFF, PDF, PS etc., and generate
+the SMILES or SDF representation of the molecular structure images encountered
+within that document.
+
+Note that any software designed for optical recognition is unlikely to be
+perfect, and the output produced might, and probably will, contain errors,
+so curation by a human knowledgeable in chemical structures is highly recommended.
+
+http://cactus.nci.nih.gov/osra/
+
+
+The wrapper comes with an automatic installation of all dependencies through the
+galaxy toolshed. If you do not want to use that feature you can compile OSRA with
+the following instructions.
+
+
+########################################
+# #
+# OSRA Setup #
+# currently a small patch is requiered #
+# #
+########################################
+
+Save the following file to 'osra-1.4.0-configure.patch' and store it next
+to that script below.
+
+-------------------------------------------------------
+--- configure 2013-03-18 18:14:00.888605219 +0100
++++ configure_ori 2013-03-18 18:13:48.396605759 +0100
+@@ -4178,7 +4178,8 @@
+
+ # Check whether --with-tclap-include was given.
+ if test "${with_tclap_include+set}" = set; then :
+- withval=$with_tclap_include;
++ withval=$with_tclap_include;
++ CPPFLAGS="-I${withval} ${CPPFLAGS}"
+ else
+ with_tclap_include="auto"
+ fi
+-------------------------------------------------------
+
+#!/usr/bin/bash
+#osra installation
+
+cdir=`pwd`
+
+wget http://sourceforge.net/projects/graphicsmagick/files/graphicsmagick/1.3.18/GraphicsMagick-1.3.18.tar.gz
+tar xfvz GraphicsMagick-1.3.18.tar.gz
+cd GraphicsMagick-1.3.18
+./configure --prefix=`pwd`/build --enable-shared=yes
+make
+make install
+cd ..
+
+export PATH=$PATH:/home/bag/projects/osra/GraphicsMagick-1.3.18/build/bin/
+
+wget http://potrace.sourceforge.net/download/potrace-1.11.tar.gz
+tar xfvz potrace-1.11.tar.gz
+cd potrace-1.11
+./configure --with-libpotrace --prefix=`pwd`/build
+make
+make install
+cd ..
+
+wget http://downloads.sourceforge.net/project/osra/gocr-patched/gocr-0.50pre-patched.tgz
+tar xfvz gocr-0.50pre-patched.tgz
+cd gocr-0.50pre-patched
+./configure --prefix=`pwd`/build
+make libs
+make all install
+cd ..
+
+wget http://downloads.sourceforge.net/project/tclap/tclap-1.2.1.tar.gz
+tar xfvz tclap-1.2.1.tar.gz
+cd tclap-1.2.1
+./configure --prefix=`pwd`/build
+make
+make install
+cd ..
+
+wget http://mirror.checkdomain.de/gnu/ocrad/ocrad-0.21.tar.gz
+tar xfvz ocrad-0.21.tar.gz
+cd ocrad-0.21
+./configure --prefix=`pwd`/build
+make
+make install
+cd ..
+
+wget http://downloads.sourceforge.net/project/openbabel/openbabel/2.3.2/openbabel-2.3.2.tar.gz
+tar xfvz openbabel-2.3.2.tar.gz
+cd openbabel-2.3.2
+mkdir build
+cd build
+cmake .. -DCMAKE_INSTALL_PREFIX=`pwd`/install/
+make
+make install
+cd ../..
+
+#wget http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz
+#tar xfvz tesseract-ocr-3.02.02.tar.gz
+#cd tesseract-ocr
+#./configure --prefix=`pwd`/build
+#make
+#make install
+#cd ..
+
+
+wget https://launchpad.net/cuneiform-linux/1.1/1.1/+download/cuneiform-linux-1.1.0.tar.bz2
+tar xfvj cuneiform-linux-1.1.0.tar.bz2
+cd cuneiform-linux-1.1.0
+mkdir build
+cd build
+cmake .. -DCMAKE_INSTALL_PREFIX=`pwd`/install/
+make
+make install
+cd ../..
+
+
+wget http://downloads.sourceforge.net/project/osra/osra/1.4.0/osra-1.4.0.tgz
+tar xfvz osra-1.4.0.tgz
+patch osra-1.4.0/configure < osra-1.4.0-configure.patch
+cd osra-1.4.0
+
+./configure --with-tclap-include=$cdir/tclap-1.2.1/build/include/ \
+--with-potrace-include=$cdir/potrace-1.11/build/include/ \
+--with-potrace-lib=$cdir/potrace-1.11/build/lib/ \
+--with-gocr-include=$cdir/gocr-0.50pre-patched/build/include/gocr/ \
+--with-gocr-lib=$cdir/gocr-0.50pre-patched/build/lib/ \
+--with-ocrad-include=$cdir/ocrad-0.21/build/include/ \
+--with-ocrad-lib=$cdir/ocrad-0.21/build/lib/ \
+--with-cuneiform-include=$cdir/cuneiform-linux-1.1.0/build/install/include/ \
+--with-cuneiform \
+--with-cuneiform-lib=$cdir/cuneiform-linux-1.1.0/build/install/lib/ \
+--with-openbabel-include=$cdir/openbabel-2.3.2/build/install/include/openbabel-2.0/ \
+--with-openbabel-lib=$cdir/openbabel-2.3.2/build/install/lib/ \
+--with-graphicsmagick-lib=$cdir/GraphicsMagick-1.3.18/build/lib/ \
+--with-graphicsmagick-include=$cdir/GraphicsMagick-1.3.18/build/include/GraphicsMagick/ \
+--prefix=`pwd`/bin/
+#--enable-static-linking --disable-graphicsmagick-config
+make
+make install
+
+export LD_LIBRARY_PATH=$cdir/GraphicsMagick-1.3.18/build/lib/:$cdir/GraphicsMagick-1.3.18/build/lib:$cdir/potrace-1.11/build/lib/
+./bin/bin/osra
+
+
+#--with-tesseract-include=$cdir/tclap-1.2.1/build/include/
+#--with-tesseract
+#--with-tesseract-lib
+
+
+
+
+
+
+
+
diff -r 000000000000 -r efd9c309b8f7 repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Tue Mar 26 14:50:53 2013 -0400
@@ -0,0 +1,4 @@
+
+
+
+
diff -r 000000000000 -r efd9c309b8f7 test_data/CID_2244.png
Binary file test_data/CID_2244.png has changed
diff -r 000000000000 -r efd9c309b8f7 test_data/CID_2244.sdf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test_data/CID_2244.sdf Tue Mar 26 14:50:53 2013 -0400
@@ -0,0 +1,155 @@
+2244
+ -OEChem-05151212332D
+
+ 21 21 0 0 0 0 0 0 0999 V2000
+ 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
+ 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
+ 1 5 1 0 0 0 0
+ 1 12 1 0 0 0 0
+ 2 11 1 0 0 0 0
+ 2 21 1 0 0 0 0
+ 3 11 2 0 0 0 0
+ 4 12 2 0 0 0 0
+ 5 6 1 0 0 0 0
+ 5 7 2 0 0 0 0
+ 6 8 2 0 0 0 0
+ 6 11 1 0 0 0 0
+ 7 9 1 0 0 0 0
+ 7 14 1 0 0 0 0
+ 8 10 1 0 0 0 0
+ 8 15 1 0 0 0 0
+ 9 10 2 0 0 0 0
+ 9 16 1 0 0 0 0
+ 10 17 1 0 0 0 0
+ 12 13 1 0 0 0 0
+ 13 18 1 0 0 0 0
+ 13 19 1 0 0 0 0
+ 13 20 1 0 0 0 0
+M END
+>
+2244
+
+>
+1
+
+>
+212
+
+>
+4
+
+>
+1
+
+>
+3
+
+>
+AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA==
+
+>
+2-acetoxybenzoic acid
+
+>
+2-acetyloxybenzoic acid
+
+>
+2-acetyloxybenzoic acid
+
+>
+2-acetyloxybenzoic acid
+
+>
+2-acetoxybenzoic acid
+
+>
+InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)
+
+>
+BSYNRYMUTXBXSQ-UHFFFAOYSA-N
+
+>
+1.2
+
+>
+180.042259
+
+>
+C9H8O4
+
+>
+180.15742
+
+>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+>
+CC(=O)OC1=CC=CC=C1C(=O)O
+
+>
+63.6
+
+>
+180.042259
+
+>
+0
+
+>
+13
+
+>
+0
+
+>
+0
+
+>
+0
+
+>
+0
+
+>
+0
+
+>
+1
+
+>
+1
+
+>
+1
+5
+255
+
+>
+5 6 8
+5 7 8
+6 8 8
+7 9 8
+8 10 8
+9 10 8
+
+$$$$
+
diff -r 000000000000 -r efd9c309b8f7 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Tue Mar 26 14:50:53 2013 -0400
@@ -0,0 +1,63 @@
+
+
+
+
+ http://downloads.sourceforge.net/project/osra/osra/1.4.0/osra-1.4.0.tgz
+
+ wget http://sourceforge.net/projects/graphicsmagick/files/graphicsmagick/1.3.18/GraphicsMagick-1.3.18.tar.gz
+ tar xfvz GraphicsMagick-1.3.18.tar.gz && cd GraphicsMagick-1.3.18 && ./configure --prefix=$INSTALL_DIR/gmagick/build --enable-shared=yes && make && make install
+
+
+ wget http://potrace.sourceforge.net/download/potrace-1.11.tar.gz
+ tar xfvz potrace-1.11.tar.gz && cd potrace-1.11 && ./configure --with-libpotrace --prefix=$INSTALL_DIR/potrace/build && make && make install
+
+
+ wget http://downloads.sourceforge.net/project/osra/gocr-patched/gocr-0.50pre-patched.tgz
+ tar xfvz gocr-0.50pre-patched.tgz && cd gocr-0.50pre-patched && ./configure --prefix=$INSTALL_DIR/gocr/build && make libs && make all install
+
+
+ wget http://downloads.sourceforge.net/project/tclap/tclap-1.2.1.tar.gz
+ tar xfvz tclap-1.2.1.tar.gz && cd tclap-1.2.1 && ./configure --prefix=$INSTALL_DIR/tclap/build && make && make install
+
+
+
+ wget http://mirror.checkdomain.de/gnu/ocrad/ocrad-0.21.tar.gz
+ tar xfvz ocrad-0.21.tar.gz && cd ocrad-0.21 && ./configure --prefix=$INSTALL_DIR/ocrad/build && make && make install
+
+
+ wget http://downloads.sourceforge.net/project/openbabel/openbabel/2.3.2/openbabel-2.3.2.tar.gz
+ tar xfvz openbabel-2.3.2.tar.gz && cd openbabel-2.3.2 && cmake . -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/openbabel/build && make && make install
+
+
+ wget https://launchpad.net/cuneiform-linux/1.1/1.1/+download/cuneiform-linux-1.1.0.tar.bz2
+ tar xfvj cuneiform-linux-1.1.0.tar.bz2 && cd cuneiform-linux-1.1.0 && mkdir build && cd build && cmake .. -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/cuneiform/build/ && make && make install
+
+
+
+ wget http://share.gruenings.eu/osra-1.4.0-configure.patch
+ patch configure < osra-1.4.0-configure.patch
+
+ export PATH=$PATH:$INSTALL_DIR/gmagick/build/bin/ && ./configure --with-tclap-include=$INSTALL_DIR/tclap/build/include/ --with-potrace-include=$INSTALL_DIR/potrace/build/include/ --with-potrace-lib=$INSTALL_DIR/potrace/build/lib/ --with-gocr-include=$INSTALL_DIR/gocr/build/include/gocr/ --with-gocr-lib=$INSTALL_DIR/gocr/build/lib/ --with-ocrad-include=$INSTALL_DIR/ocrad/build/include/ --with-ocrad-lib=$INSTALL_DIR/ocrad/build/lib/ --with-cuneiform-include=$INSTALL_DIR/cuneiform/build/install/include/ --with-cuneiform --with-cuneiform-lib=$INSTALL_DIR/cuneiform/build/install/lib/ --with-openbabel-include=$INSTALL_DIR/openbabel/build/include/openbabel-2.0/ --with-openbabel-lib=$INSTALL_DIR/openbabel/build/lib/ --with-graphicsmagick-lib=$INSTALL_DIR/gmagick/build/lib/ --with-graphicsmagick-include=$INSTALL_DIR/gmagick/build/include/GraphicsMagick/ --prefix=$INSTALL_DIR
+ make
+ make install
+
+ rm $INSTALL_DIR/tclap/ -r
+ rm $INSTALL_DIR/openbabel/ -r
+ rm $INSTALL_DIR/gocr/ -r
+ rm $INSTALL_DIR/ocrad/ -r
+ rm $INSTALL_DIR/cuneiform/ -r
+
+
+ $INSTALL_DIR/gmagick/build/lib/
+
+
+ $INSTALL_DIR/potrace/build/lib/
+
+
+ $INSTALL_DIR/bin
+
+
+
+ We still have a handfull of requirements
+
+