view readme @ 4:3b50c1d0d1e5

Uploaded
author bgruening
date Thu, 25 Apr 2013 11:05:09 -0400
parents efd9c309b8f7
children edff5c162649
line wrap: on
line source

OSRA: Optical Structure Recognition Application

OSRA is a utility designed to convert graphical representations of chemical 
structures, as they appear in journal articles, patent documents, textbooks, 
trade magazines etc., into SMILES (Simplified Molecular Input Line Entry 
Specification - see http://en.wikipedia.org/wiki/SMILES) or 
SD files - a computer recognizable molecular structure format. 
OSRA can read a document in any of the over 90 graphical formats parseable by 
ImageMagick - including GIF, JPEG, PNG, TIFF, PDF, PS etc., and generate 
the SMILES or SDF representation of the molecular structure images encountered 
within that document.

Note that any software designed for optical recognition is unlikely to be 
perfect, and the output produced might, and probably will, contain errors, 
so curation by a human knowledgeable in chemical structures is highly recommended.

http://cactus.nci.nih.gov/osra/


The wrapper comes with an automatic installation of all dependencies through the
galaxy toolshed. If you do not want to use that feature you can compile OSRA with
the following instructions.


########################################
#                                      #
#           OSRA Setup                 #
# currently a small patch is requiered #
#                                      #
########################################

Save the following file to 'osra-1.4.0-configure.patch' and store it next 
to that script below.

-------------------------------------------------------
--- configure	2013-03-18 18:14:00.888605219 +0100
+++ configure_ori	2013-03-18 18:13:48.396605759 +0100
@@ -4178,7 +4178,8 @@
 
 # Check whether --with-tclap-include was given.
 if test "${with_tclap_include+set}" = set; then :
-  withval=$with_tclap_include;
+    withval=$with_tclap_include;
+    CPPFLAGS="-I${withval} ${CPPFLAGS}"
 else
   with_tclap_include="auto"
 fi
-------------------------------------------------------

#!/usr/bin/bash
#osra installation

cdir=`pwd`

wget http://sourceforge.net/projects/graphicsmagick/files/graphicsmagick/1.3.18/GraphicsMagick-1.3.18.tar.gz
tar xfvz GraphicsMagick-1.3.18.tar.gz
cd GraphicsMagick-1.3.18
./configure --prefix=`pwd`/build --enable-shared=yes
make
make install
cd ..

export PATH=$PATH:/home/bag/projects/osra/GraphicsMagick-1.3.18/build/bin/

wget http://potrace.sourceforge.net/download/potrace-1.11.tar.gz
tar xfvz potrace-1.11.tar.gz
cd potrace-1.11
./configure --with-libpotrace --prefix=`pwd`/build
make
make install
cd ..

wget http://downloads.sourceforge.net/project/osra/gocr-patched/gocr-0.50pre-patched.tgz
tar xfvz gocr-0.50pre-patched.tgz
cd gocr-0.50pre-patched
./configure --prefix=`pwd`/build
make libs
make all install
cd ..

wget http://downloads.sourceforge.net/project/tclap/tclap-1.2.1.tar.gz
tar xfvz tclap-1.2.1.tar.gz
cd tclap-1.2.1
./configure --prefix=`pwd`/build
make
make install
cd ..

wget http://mirror.checkdomain.de/gnu/ocrad/ocrad-0.21.tar.gz
tar xfvz ocrad-0.21.tar.gz
cd ocrad-0.21
./configure --prefix=`pwd`/build
make
make install
cd ..

wget http://downloads.sourceforge.net/project/openbabel/openbabel/2.3.2/openbabel-2.3.2.tar.gz
tar xfvz openbabel-2.3.2.tar.gz
cd openbabel-2.3.2
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=`pwd`/install/
make
make install
cd ../..

#wget http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz
#tar xfvz tesseract-ocr-3.02.02.tar.gz
#cd tesseract-ocr
#./configure --prefix=`pwd`/build
#make
#make install
#cd ..


wget https://launchpad.net/cuneiform-linux/1.1/1.1/+download/cuneiform-linux-1.1.0.tar.bz2
tar xfvj cuneiform-linux-1.1.0.tar.bz2
cd cuneiform-linux-1.1.0
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=`pwd`/install/
make
make install
cd ../..


wget http://downloads.sourceforge.net/project/osra/osra/1.4.0/osra-1.4.0.tgz
tar xfvz osra-1.4.0.tgz
patch osra-1.4.0/configure < osra-1.4.0-configure.patch
cd osra-1.4.0

./configure --with-tclap-include=$cdir/tclap-1.2.1/build/include/ \
--with-potrace-include=$cdir/potrace-1.11/build/include/ \
--with-potrace-lib=$cdir/potrace-1.11/build/lib/ \
--with-gocr-include=$cdir/gocr-0.50pre-patched/build/include/gocr/ \
--with-gocr-lib=$cdir/gocr-0.50pre-patched/build/lib/ \
--with-ocrad-include=$cdir/ocrad-0.21/build/include/ \
--with-ocrad-lib=$cdir/ocrad-0.21/build/lib/ \
--with-cuneiform-include=$cdir/cuneiform-linux-1.1.0/build/install/include/ \
--with-cuneiform \
--with-cuneiform-lib=$cdir/cuneiform-linux-1.1.0/build/install/lib/ \
--with-openbabel-include=$cdir/openbabel-2.3.2/build/install/include/openbabel-2.0/ \
--with-openbabel-lib=$cdir/openbabel-2.3.2/build/install/lib/ \
--with-graphicsmagick-lib=$cdir/GraphicsMagick-1.3.18/build/lib/ \
--with-graphicsmagick-include=$cdir/GraphicsMagick-1.3.18/build/include/GraphicsMagick/ \
--prefix=`pwd`/bin/ 
#--enable-static-linking --disable-graphicsmagick-config
make
make install

export LD_LIBRARY_PATH=$cdir/GraphicsMagick-1.3.18/build/lib/:$cdir/GraphicsMagick-1.3.18/build/lib:$cdir/potrace-1.11/build/lib/
./bin/bin/osra


#--with-tesseract-include=$cdir/tclap-1.2.1/build/include/
#--with-tesseract
#--with-tesseract-lib