Mercurial > repos > bgruening > osra
changeset 0:efd9c309b8f7
Uploaded
author | bgruening |
---|---|
date | Tue, 26 Mar 2013 14:50:53 -0400 (2013-03-26) |
parents | |
children | 4a71d98176f6 |
files | osra.xml readme repository_dependencies.xml test_data/CID_2244.png test_data/CID_2244.sdf tool_dependencies.xml |
diffstat | 6 files changed, 425 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/osra.xml Tue Mar 26 14:50:53 2013 -0400 @@ -0,0 +1,38 @@ +<tool id="osra" name="Molecule recognition" version="0.2"> + <description>in Images or PDF documents</description> + <requirements> + <requirement type="package" version="1.4.0">osra</requirement> + </requirements> + <command> + osra -f $oformat $infile > $outfile + </command> + <inputs> + <param name="infile" type="data" format="png,pdf" label="Image or PDF with molecules"/> + <param name="oformat" type="select" label="Output molecule format"> + <option value="can">SMILES</option> + <option value="sdf">SDF</option> + </param> + </inputs> + <outputs> + <data name="outfile" type="data" format="sdf"> + <change_format> + <when input="oformat" value="can" format="smi"/> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="infile" ftype="png" value="CID_2244.png"/> + <param name="oformat" value="sdf"/> + <output name="outfile" ftype="sdf" file="osra_on_CID2244.sdf"/> + </test> + </tests> + <help> + +**What it does** + +OSRA detects molecules in images and converts them to standard molecule formats. + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme Tue Mar 26 14:50:53 2013 -0400 @@ -0,0 +1,165 @@ +OSRA: Optical Structure Recognition Application + +OSRA is a utility designed to convert graphical representations of chemical +structures, as they appear in journal articles, patent documents, textbooks, +trade magazines etc., into SMILES (Simplified Molecular Input Line Entry +Specification - see http://en.wikipedia.org/wiki/SMILES) or +SD files - a computer recognizable molecular structure format. +OSRA can read a document in any of the over 90 graphical formats parseable by +ImageMagick - including GIF, JPEG, PNG, TIFF, PDF, PS etc., and generate +the SMILES or SDF representation of the molecular structure images encountered +within that document. + +Note that any software designed for optical recognition is unlikely to be +perfect, and the output produced might, and probably will, contain errors, +so curation by a human knowledgeable in chemical structures is highly recommended. + +http://cactus.nci.nih.gov/osra/ + + +The wrapper comes with an automatic installation of all dependencies through the +galaxy toolshed. If you do not want to use that feature you can compile OSRA with +the following instructions. + + +######################################## +# # +# OSRA Setup # +# currently a small patch is requiered # +# # +######################################## + +Save the following file to 'osra-1.4.0-configure.patch' and store it next +to that script below. + +------------------------------------------------------- +--- configure 2013-03-18 18:14:00.888605219 +0100 ++++ configure_ori 2013-03-18 18:13:48.396605759 +0100 +@@ -4178,7 +4178,8 @@ + + # Check whether --with-tclap-include was given. + if test "${with_tclap_include+set}" = set; then : +- withval=$with_tclap_include; ++ withval=$with_tclap_include; ++ CPPFLAGS="-I${withval} ${CPPFLAGS}" + else + with_tclap_include="auto" + fi +------------------------------------------------------- + +#!/usr/bin/bash +#osra installation + +cdir=`pwd` + +wget http://sourceforge.net/projects/graphicsmagick/files/graphicsmagick/1.3.18/GraphicsMagick-1.3.18.tar.gz +tar xfvz GraphicsMagick-1.3.18.tar.gz +cd GraphicsMagick-1.3.18 +./configure --prefix=`pwd`/build --enable-shared=yes +make +make install +cd .. + +export PATH=$PATH:/home/bag/projects/osra/GraphicsMagick-1.3.18/build/bin/ + +wget http://potrace.sourceforge.net/download/potrace-1.11.tar.gz +tar xfvz potrace-1.11.tar.gz +cd potrace-1.11 +./configure --with-libpotrace --prefix=`pwd`/build +make +make install +cd .. + +wget http://downloads.sourceforge.net/project/osra/gocr-patched/gocr-0.50pre-patched.tgz +tar xfvz gocr-0.50pre-patched.tgz +cd gocr-0.50pre-patched +./configure --prefix=`pwd`/build +make libs +make all install +cd .. + +wget http://downloads.sourceforge.net/project/tclap/tclap-1.2.1.tar.gz +tar xfvz tclap-1.2.1.tar.gz +cd tclap-1.2.1 +./configure --prefix=`pwd`/build +make +make install +cd .. + +wget http://mirror.checkdomain.de/gnu/ocrad/ocrad-0.21.tar.gz +tar xfvz ocrad-0.21.tar.gz +cd ocrad-0.21 +./configure --prefix=`pwd`/build +make +make install +cd .. + +wget http://downloads.sourceforge.net/project/openbabel/openbabel/2.3.2/openbabel-2.3.2.tar.gz +tar xfvz openbabel-2.3.2.tar.gz +cd openbabel-2.3.2 +mkdir build +cd build +cmake .. -DCMAKE_INSTALL_PREFIX=`pwd`/install/ +make +make install +cd ../.. + +#wget http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz +#tar xfvz tesseract-ocr-3.02.02.tar.gz +#cd tesseract-ocr +#./configure --prefix=`pwd`/build +#make +#make install +#cd .. + + +wget https://launchpad.net/cuneiform-linux/1.1/1.1/+download/cuneiform-linux-1.1.0.tar.bz2 +tar xfvj cuneiform-linux-1.1.0.tar.bz2 +cd cuneiform-linux-1.1.0 +mkdir build +cd build +cmake .. -DCMAKE_INSTALL_PREFIX=`pwd`/install/ +make +make install +cd ../.. + + +wget http://downloads.sourceforge.net/project/osra/osra/1.4.0/osra-1.4.0.tgz +tar xfvz osra-1.4.0.tgz +patch osra-1.4.0/configure < osra-1.4.0-configure.patch +cd osra-1.4.0 + +./configure --with-tclap-include=$cdir/tclap-1.2.1/build/include/ \ +--with-potrace-include=$cdir/potrace-1.11/build/include/ \ +--with-potrace-lib=$cdir/potrace-1.11/build/lib/ \ +--with-gocr-include=$cdir/gocr-0.50pre-patched/build/include/gocr/ \ +--with-gocr-lib=$cdir/gocr-0.50pre-patched/build/lib/ \ +--with-ocrad-include=$cdir/ocrad-0.21/build/include/ \ +--with-ocrad-lib=$cdir/ocrad-0.21/build/lib/ \ +--with-cuneiform-include=$cdir/cuneiform-linux-1.1.0/build/install/include/ \ +--with-cuneiform \ +--with-cuneiform-lib=$cdir/cuneiform-linux-1.1.0/build/install/lib/ \ +--with-openbabel-include=$cdir/openbabel-2.3.2/build/install/include/openbabel-2.0/ \ +--with-openbabel-lib=$cdir/openbabel-2.3.2/build/install/lib/ \ +--with-graphicsmagick-lib=$cdir/GraphicsMagick-1.3.18/build/lib/ \ +--with-graphicsmagick-include=$cdir/GraphicsMagick-1.3.18/build/include/GraphicsMagick/ \ +--prefix=`pwd`/bin/ +#--enable-static-linking --disable-graphicsmagick-config +make +make install + +export LD_LIBRARY_PATH=$cdir/GraphicsMagick-1.3.18/build/lib/:$cdir/GraphicsMagick-1.3.18/build/lib:$cdir/potrace-1.11/build/lib/ +./bin/bin/osra + + +#--with-tesseract-include=$cdir/tclap-1.2.1/build/include/ +#--with-tesseract +#--with-tesseract-lib + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Tue Mar 26 14:50:53 2013 -0400 @@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format)."> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="molecule_datatypes" owner="bgruening" changeset_revision="7cb4c02f61e6" /> +</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/CID_2244.sdf Tue Mar 26 14:50:53 2013 -0400 @@ -0,0 +1,155 @@ +2244 + -OEChem-05151212332D + + 21 21 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> <PUBCHEM_COMPOUND_CID> +2244 + +> <PUBCHEM_COMPOUND_CANONICALIZED> +1 + +> <PUBCHEM_CACTVS_COMPLEXITY> +212 + +> <PUBCHEM_CACTVS_HBOND_ACCEPTOR> +4 + +> <PUBCHEM_CACTVS_HBOND_DONOR> +1 + +> <PUBCHEM_CACTVS_ROTATABLE_BOND> +3 + +> <PUBCHEM_CACTVS_SUBSKEYS> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> <PUBCHEM_IUPAC_OPENEYE_NAME> +2-acetoxybenzoic acid + +> <PUBCHEM_IUPAC_CAS_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_SYSTEMATIC_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_TRADITIONAL_NAME> +2-acetoxybenzoic acid + +> <PUBCHEM_IUPAC_INCHI> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> <PUBCHEM_IUPAC_INCHIKEY> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> <PUBCHEM_XLOGP3> +1.2 + +> <PUBCHEM_EXACT_MASS> +180.042259 + +> <PUBCHEM_MOLECULAR_FORMULA> +C9H8O4 + +> <PUBCHEM_MOLECULAR_WEIGHT> +180.15742 + +> <PUBCHEM_OPENEYE_CAN_SMILES> +CC(=O)OC1=CC=CC=C1C(=O)O + +> <PUBCHEM_OPENEYE_ISO_SMILES> +CC(=O)OC1=CC=CC=C1C(=O)O + +> <PUBCHEM_CACTVS_TPSA> +63.6 + +> <PUBCHEM_MONOISOTOPIC_WEIGHT> +180.042259 + +> <PUBCHEM_TOTAL_CHARGE> +0 + +> <PUBCHEM_HEAVY_ATOM_COUNT> +13 + +> <PUBCHEM_ATOM_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_ISOTOPIC_ATOM_COUNT> +0 + +> <PUBCHEM_COMPONENT_COUNT> +1 + +> <PUBCHEM_CACTVS_TAUTO_COUNT> +1 + +> <PUBCHEM_COORDINATE_TYPE> +1 +5 +255 + +> <PUBCHEM_BONDANNOTATIONS> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$ +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Mar 26 14:50:53 2013 -0400 @@ -0,0 +1,63 @@ +<tool_dependency> + <package name="osra" version="1.4.0"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://downloads.sourceforge.net/project/osra/osra/1.4.0/osra-1.4.0.tgz</action> + <!--compiling GraphicsMagick-1.3.18 --> + <action type="shell_command">wget http://sourceforge.net/projects/graphicsmagick/files/graphicsmagick/1.3.18/GraphicsMagick-1.3.18.tar.gz</action> + <action type="shell_command">tar xfvz GraphicsMagick-1.3.18.tar.gz && cd GraphicsMagick-1.3.18 && ./configure --prefix=$INSTALL_DIR/gmagick/build --enable-shared=yes && make && make install</action> + + <!--compiling potrace-1.11 --> + <action type="shell_command">wget http://potrace.sourceforge.net/download/potrace-1.11.tar.gz</action> + <action type="shell_command">tar xfvz potrace-1.11.tar.gz && cd potrace-1.11 && ./configure --with-libpotrace --prefix=$INSTALL_DIR/potrace/build && make && make install</action> + + <!--compiling gocr 0.50patched --> + <action type="shell_command">wget http://downloads.sourceforge.net/project/osra/gocr-patched/gocr-0.50pre-patched.tgz</action> + <action type="shell_command">tar xfvz gocr-0.50pre-patched.tgz && cd gocr-0.50pre-patched && ./configure --prefix=$INSTALL_DIR/gocr/build && make libs && make all install</action> + + <!--compiling tclap 1.2.1 --> + <action type="shell_command">wget http://downloads.sourceforge.net/project/tclap/tclap-1.2.1.tar.gz</action> + <action type="shell_command">tar xfvz tclap-1.2.1.tar.gz && cd tclap-1.2.1 && ./configure --prefix=$INSTALL_DIR/tclap/build && make && make install</action> + + + <!--compiling ocrad 0.21 --> + <action type="shell_command">wget http://mirror.checkdomain.de/gnu/ocrad/ocrad-0.21.tar.gz</action> + <action type="shell_command">tar xfvz ocrad-0.21.tar.gz && cd ocrad-0.21 && ./configure --prefix=$INSTALL_DIR/ocrad/build && make && make install</action> + + <!--compiling openbabel 2.3.2 --> + <action type="shell_command">wget http://downloads.sourceforge.net/project/openbabel/openbabel/2.3.2/openbabel-2.3.2.tar.gz</action> + <action type="shell_command">tar xfvz openbabel-2.3.2.tar.gz && cd openbabel-2.3.2 && cmake . -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/openbabel/build && make && make install</action> + + <!--compiling cuneiform 1.1.0 --> + <action type="shell_command">wget https://launchpad.net/cuneiform-linux/1.1/1.1/+download/cuneiform-linux-1.1.0.tar.bz2</action> + <action type="shell_command">tar xfvj cuneiform-linux-1.1.0.tar.bz2 && cd cuneiform-linux-1.1.0 && mkdir build && cd build && cmake .. -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/cuneiform/build/ && make && make install</action> + + <!--And finally OSRA 1.4.0--> + <!--<action type="download_by_url">http://downloads.sourceforge.net/project/osra/osra/1.4.0/osra-1.4.0.tgz</action>--> + <action type="shell_command">wget http://share.gruenings.eu/osra-1.4.0-configure.patch</action> + <action type="shell_command">patch configure < osra-1.4.0-configure.patch</action> + <!-- Extending the PATH is needed for OSRA --> + <action type="shell_command">export PATH=$PATH:$INSTALL_DIR/gmagick/build/bin/ && ./configure --with-tclap-include=$INSTALL_DIR/tclap/build/include/ --with-potrace-include=$INSTALL_DIR/potrace/build/include/ --with-potrace-lib=$INSTALL_DIR/potrace/build/lib/ --with-gocr-include=$INSTALL_DIR/gocr/build/include/gocr/ --with-gocr-lib=$INSTALL_DIR/gocr/build/lib/ --with-ocrad-include=$INSTALL_DIR/ocrad/build/include/ --with-ocrad-lib=$INSTALL_DIR/ocrad/build/lib/ --with-cuneiform-include=$INSTALL_DIR/cuneiform/build/install/include/ --with-cuneiform --with-cuneiform-lib=$INSTALL_DIR/cuneiform/build/install/lib/ --with-openbabel-include=$INSTALL_DIR/openbabel/build/include/openbabel-2.0/ --with-openbabel-lib=$INSTALL_DIR/openbabel/build/lib/ --with-graphicsmagick-lib=$INSTALL_DIR/gmagick/build/lib/ --with-graphicsmagick-include=$INSTALL_DIR/gmagick/build/include/GraphicsMagick/ --prefix=$INSTALL_DIR </action> + <action type="shell_command">make</action> + <action type="shell_command">make install</action> + + <action type="shell_command">rm $INSTALL_DIR/tclap/ -r</action> + <action type="shell_command">rm $INSTALL_DIR/openbabel/ -r</action> + <action type="shell_command">rm $INSTALL_DIR/gocr/ -r</action> + <action type="shell_command">rm $INSTALL_DIR/ocrad/ -r</action> + <action type="shell_command">rm $INSTALL_DIR/cuneiform/ -r</action> + + <action type="set_environment"> + <environment_variable name="LD_LIBRARY_PATH" action="prepend_to">$INSTALL_DIR/gmagick/build/lib/</environment_variable> + </action> + <action type="set_environment"> + <environment_variable name="LD_LIBRARY_PATH" action="prepend_to">$INSTALL_DIR/potrace/build/lib/</environment_variable> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme>We still have a handfull of requirements</readme> + </package> +</tool_dependency>