view readme @ 0:efd9c309b8f7

Uploaded
author bgruening
date Tue, 26 Mar 2013 14:50:53 -0400
parents
children edff5c162649
line wrap: on
line source

OSRA: Optical Structure Recognition Application

OSRA is a utility designed to convert graphical representations of chemical 
structures, as they appear in journal articles, patent documents, textbooks, 
trade magazines etc., into SMILES (Simplified Molecular Input Line Entry 
Specification - see http://en.wikipedia.org/wiki/SMILES) or 
SD files - a computer recognizable molecular structure format. 
OSRA can read a document in any of the over 90 graphical formats parseable by 
ImageMagick - including GIF, JPEG, PNG, TIFF, PDF, PS etc., and generate 
the SMILES or SDF representation of the molecular structure images encountered 
within that document.

Note that any software designed for optical recognition is unlikely to be 
perfect, and the output produced might, and probably will, contain errors, 
so curation by a human knowledgeable in chemical structures is highly recommended.

http://cactus.nci.nih.gov/osra/


The wrapper comes with an automatic installation of all dependencies through the
galaxy toolshed. If you do not want to use that feature you can compile OSRA with
the following instructions.


########################################
#                                      #
#           OSRA Setup                 #
# currently a small patch is requiered #
#                                      #
########################################

Save the following file to 'osra-1.4.0-configure.patch' and store it next 
to that script below.

-------------------------------------------------------
--- configure	2013-03-18 18:14:00.888605219 +0100
+++ configure_ori	2013-03-18 18:13:48.396605759 +0100
@@ -4178,7 +4178,8 @@
 
 # Check whether --with-tclap-include was given.
 if test "${with_tclap_include+set}" = set; then :
-  withval=$with_tclap_include;
+    withval=$with_tclap_include;
+    CPPFLAGS="-I${withval} ${CPPFLAGS}"
 else
   with_tclap_include="auto"
 fi
-------------------------------------------------------

#!/usr/bin/bash
#osra installation

cdir=`pwd`

wget http://sourceforge.net/projects/graphicsmagick/files/graphicsmagick/1.3.18/GraphicsMagick-1.3.18.tar.gz
tar xfvz GraphicsMagick-1.3.18.tar.gz
cd GraphicsMagick-1.3.18
./configure --prefix=`pwd`/build --enable-shared=yes
make
make install
cd ..

export PATH=$PATH:/home/bag/projects/osra/GraphicsMagick-1.3.18/build/bin/

wget http://potrace.sourceforge.net/download/potrace-1.11.tar.gz
tar xfvz potrace-1.11.tar.gz
cd potrace-1.11
./configure --with-libpotrace --prefix=`pwd`/build
make
make install
cd ..

wget http://downloads.sourceforge.net/project/osra/gocr-patched/gocr-0.50pre-patched.tgz
tar xfvz gocr-0.50pre-patched.tgz
cd gocr-0.50pre-patched
./configure --prefix=`pwd`/build
make libs
make all install
cd ..

wget http://downloads.sourceforge.net/project/tclap/tclap-1.2.1.tar.gz
tar xfvz tclap-1.2.1.tar.gz
cd tclap-1.2.1
./configure --prefix=`pwd`/build
make
make install
cd ..

wget http://mirror.checkdomain.de/gnu/ocrad/ocrad-0.21.tar.gz
tar xfvz ocrad-0.21.tar.gz
cd ocrad-0.21
./configure --prefix=`pwd`/build
make
make install
cd ..

wget http://downloads.sourceforge.net/project/openbabel/openbabel/2.3.2/openbabel-2.3.2.tar.gz
tar xfvz openbabel-2.3.2.tar.gz
cd openbabel-2.3.2
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=`pwd`/install/
make
make install
cd ../..

#wget http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz
#tar xfvz tesseract-ocr-3.02.02.tar.gz
#cd tesseract-ocr
#./configure --prefix=`pwd`/build
#make
#make install
#cd ..


wget https://launchpad.net/cuneiform-linux/1.1/1.1/+download/cuneiform-linux-1.1.0.tar.bz2
tar xfvj cuneiform-linux-1.1.0.tar.bz2
cd cuneiform-linux-1.1.0
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=`pwd`/install/
make
make install
cd ../..


wget http://downloads.sourceforge.net/project/osra/osra/1.4.0/osra-1.4.0.tgz
tar xfvz osra-1.4.0.tgz
patch osra-1.4.0/configure < osra-1.4.0-configure.patch
cd osra-1.4.0

./configure --with-tclap-include=$cdir/tclap-1.2.1/build/include/ \
--with-potrace-include=$cdir/potrace-1.11/build/include/ \
--with-potrace-lib=$cdir/potrace-1.11/build/lib/ \
--with-gocr-include=$cdir/gocr-0.50pre-patched/build/include/gocr/ \
--with-gocr-lib=$cdir/gocr-0.50pre-patched/build/lib/ \
--with-ocrad-include=$cdir/ocrad-0.21/build/include/ \
--with-ocrad-lib=$cdir/ocrad-0.21/build/lib/ \
--with-cuneiform-include=$cdir/cuneiform-linux-1.1.0/build/install/include/ \
--with-cuneiform \
--with-cuneiform-lib=$cdir/cuneiform-linux-1.1.0/build/install/lib/ \
--with-openbabel-include=$cdir/openbabel-2.3.2/build/install/include/openbabel-2.0/ \
--with-openbabel-lib=$cdir/openbabel-2.3.2/build/install/lib/ \
--with-graphicsmagick-lib=$cdir/GraphicsMagick-1.3.18/build/lib/ \
--with-graphicsmagick-include=$cdir/GraphicsMagick-1.3.18/build/include/GraphicsMagick/ \
--prefix=`pwd`/bin/ 
#--enable-static-linking --disable-graphicsmagick-config
make
make install

export LD_LIBRARY_PATH=$cdir/GraphicsMagick-1.3.18/build/lib/:$cdir/GraphicsMagick-1.3.18/build/lib:$cdir/potrace-1.11/build/lib/
./bin/bin/osra


#--with-tesseract-include=$cdir/tclap-1.2.1/build/include/
#--with-tesseract
#--with-tesseract-lib