Mercurial > repos > bgruening > confab
changeset 22:70b28a917515 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/osra commit a44c0a13283e873a740eabcad04f021208290dfe-dirty
author | bgruening |
---|---|
date | Sun, 01 Nov 2015 10:31:25 -0500 |
parents | ff23947232d0 |
children | d323837e346d |
files | confab.tar.bz2 confab.xml osra.py osra.tar.bz2 osra.xml readme test-data/CID_3033.sdf test-data/confab_on_CID3033.sdf test_data/2008001635_153_chem.png test_data/2008001635_153_chem.smi test_data/CID_2244.png test_data/CID_2244.sdf tool_dependencies.xml |
diffstat | 13 files changed, 335 insertions(+), 693 deletions(-) [+] |
line wrap: on
line diff
--- a/confab.xml Sun Nov 01 10:29:42 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,142 +0,0 @@ -<tool id="ctb_confab" name="Conformer calculation" version="0.1"> - <description>for molecules (confab)</description> - <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="100" shared_inputs="" merge_outputs="outfile"></parallelism> - <requirements> - <requirement type="package" version="1.0.1">confab</requirement> - </requirements> - <command> -<![CDATA[ - confab - -i ${infile.ext} - -o sdf - -r $RMSD - -e $energy - -c $conformers - $first_conformer - $infile - $outfile 2>&1 -]]> - </command> - <inputs> - <param name="infile" type="data" format="sdf,mol2" label="Input molecule with 3D coordinates" help="Dataset missing? See TIP below"/> - <param name='RMSD' type='float' value='0.5' label='RMSD cutoff (in Angstrom)' /> - <param name='energy' type='float' value='50.0' label='Energy cutoff (in kcal/mol)' /> - <param name='conformers' type='integer' value='100' label='Max number of conformers to test' /> - <param name='first_conformer' type='boolean' truevalue='-a' falsevalue='' label='Include the input conformation as the first conformer' /> - </inputs> - <outputs> - <data format="sdf" name="outfile" label="${tool.name} on ${on_string}"/> - </outputs> - <tests> - <test> - <param name="infile" value='CID_3033.sdf' ftype='sdf' /> - <param name='RMSD' value='0.5' /> - <param name='energy' value='50.0' /> - <param name='conformers' value='100000' /> - <param name='first_conformer' value='-a' /> - <output name="outfile" file='confab_on_CID_3033.sdf' ftype='sdf' /> - </test> - </tests> -<help> -<![CDATA[ - -.. class:: infomark - -**What this tool does** - -Confab_ is a conformation generator. The algorithm starts with an input 3D structure which, after some initialisation steps, is used to generate multiple conformers which are filtered on-the-fly to identify diverse low energy conformers. - -.. _Confab: https://code.google.com/p/confab/ - ------ - -.. class:: infomark - -**Input** - -* Example:: - - 21.2060 9.9350 63.0810 C 0 0 0 0 0 0 0 0 0 0 0 0 - 21.2410 9.4460 64.5510 C 0 0 0 0 0 0 0 0 0 0 0 0 - 22.0000 8.1250 64.6300 C 0 0 0 0 0 0 0 0 0 0 0 0 - 21.7010 7.3010 65.5120 O 0 0 0 0 0 0 0 0 0 0 0 0 - 23.1180 7.8720 63.7340 C 0 0 0 0 0 0 0 0 0 0 0 0 - 23.4530 8.7270 62.7850 C 0 0 0 0 0 0 0 0 0 0 0 0 - 24.6970 8.4430 61.9510 C 0 0 0 0 0 0 0 0 0 0 0 0 - - ....... - - 1 2 1 0 0 0 0 - 1 11 1 0 0 0 0 - 2 3 1 0 0 0 0 - 3 4 2 0 0 0 0 - 3 5 1 0 0 0 0 - 5 6 2 0 0 0 0 - 6 7 1 0 0 0 0 - - RMSD cutoff (in Angstrom) 0.5 - Energy cutoff (in kcal/mol) 50.0 - Max number of conformers to test 100000 - Include the input conformation as the first conformer False - ------ - -.. class:: infomark - -**Output** - -* Example:: - - 23 26 0 0 0 0 0 0 0 0999 V2000 - 21.2060 9.9350 63.0810 C 0 0 0 0 0 0 0 0 0 0 0 0 - 21.2410 9.4460 64.5510 C 0 0 0 0 0 0 0 0 0 0 0 0 - 22.0000 8.1250 64.6300 C 0 0 0 0 0 0 0 0 0 0 0 0 - 21.7010 7.3010 65.5120 O 0 0 0 0 0 0 0 0 0 0 0 0 - 23.1180 7.8720 63.7340 C 0 0 0 0 0 0 0 0 0 0 0 0 - 23.4530 8.7270 62.7850 C 0 0 0 0 0 0 0 0 0 0 0 0 - 24.6970 8.4430 61.9510 C 0 0 0 0 0 0 0 0 0 0 0 0 - 24.4490 8.6370 60.4430 C 0 0 0 0 0 0 0 0 0 0 0 0 - 23.7890 9.9970 60.0980 C 0 0 2 0 0 0 0 0 0 0 0 0 - 22.4340 10.0950 60.8720 C 0 0 1 0 0 0 0 0 0 0 0 0 - 22.6140 10.0230 62.4340 C 0 0 1 0 0 0 0 0 0 0 0 0 - 21.6330 11.3540 60.4500 C 0 0 0 0 0 0 0 0 0 0 0 0 - 21.4320 11.4340 58.9110 C 0 0 0 0 0 0 0 0 0 0 0 0 - 22.7860 11.4040 58.1690 C 0 0 1 0 0 0 0 0 0 0 0 0 - 23.4830 10.0600 58.5980 C 0 0 1 0 0 0 0 0 0 0 0 0 - 24.6740 9.9180 57.6180 C 0 0 0 0 0 0 0 0 0 0 0 0 - 24.0720 10.4500 56.2670 C 0 0 0 0 0 0 0 0 0 0 0 0 - 22.7140 11.1490 56.6270 C 0 0 2 0 0 0 0 0 0 0 0 0 - 23.6590 12.6770 58.4540 C 0 0 0 0 0 0 0 0 0 0 0 0 - 23.4270 11.2460 63.0070 C 0 0 0 0 0 0 0 0 0 0 0 0 - 22.3750 12.3880 55.7810 C 0 0 0 0 0 0 0 0 0 0 0 0 - 23.2120 12.8760 55.0520 O 0 0 0 0 0 0 0 0 0 0 0 0 - 21.0090 12.9760 55.8570 C 0 0 0 0 0 0 0 0 0 0 0 0 - -:: - - 1 2 1 0 0 0 0 - 1 11 1 0 0 0 0 - 2 3 1 0 0 0 0 - 3 4 2 0 0 0 0 - 3 5 1 0 0 0 0 - 5 6 2 0 0 0 0 - 6 7 1 0 0 0 0 - 6 11 1 0 0 0 0 - 7 8 1 0 0 0 0 - 9 8 1 6 0 0 0 - 10 9 1 1 0 0 0 - ------ - -.. class:: infomark - -**Cite** - -Confab_ - -.. _Confab: https://code.google.com/p/confab/ - - -]]> - </help> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/osra.py Sun Nov 01 10:31:25 2015 -0500 @@ -0,0 +1,27 @@ +#!usr/bin/env python + +import os, sys +import subprocess + +""" + OSRA_DATA_FILES is set during the toolshed Installation + If it is not set, use the standard configuration of OSRA. + That means we need to delete argument 4-7. + That script is a hack, because we do not know the content of OSRA_DATA_FILES at xml evaluation time. + + osra -f $oformat $infile + -l \$OSRA_DATA_FILES/spelling.txt -a \$OSRA_DATA_FILES/superatom.txt + > $outfile +""" + +if not os.path.exists(sys.argv[7]): + # OSRA_DATA_FILES path is not set or the spelling file is not existent + sys.argv.pop(7) # superatom.txt path + sys.argv.pop(6) # -a + sys.argv.pop(5) # speling.txt path + sys.argv.pop(4) # -l + +sys.argv[0] = 'osra' +subprocess.call(sys.argv, stdout=sys.stdout) + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/osra.xml Sun Nov 01 10:31:25 2015 -0500 @@ -0,0 +1,78 @@ +<tool id="ctb_osra" name="Molecule recognition" version="0.3"> + <description>in Images or PDF documents (OSRA)</description> + <requirements> + <requirement type="package" version="2.0.0">osra</requirement> + <requirement type="package" version="2.3.2">openbabel</requirement> + <requirement type="package" version="1.3.18">graphicsmagick</requirement> + </requirements> + <command interpreter='python'> +<![CDATA[ + ## OSRA_DATA_FILES is set during the toolshed Installation + ## if it is not set, use the standard configuration and hope the best + osra.py -f $oformat $infile + -l \$OSRA_DATA_FILES/spelling.txt -a \$OSRA_DATA_FILES/superatom.txt + + ## further additions of OSRA parameter should go after -l and -a + ## because -l and -a can be removed by the python wrapper + + $confidence + $adaptive + $thinning + + > $outfile +]]> + </command> + <inputs> + <param name="infile" type="data" format="png,pdf" label="Image or PDF with molecules"/> + <param name="oformat" type="select" label="Output molecule format"> + <option value="can">SMILES</option> + <option value="sdf">SDF</option> + </param> + <param name="confidence" type="boolean" label="Print out confidence estimate (-p)" truevalue="-p" falsevalue="" checked="true" /> + <param name="adaptive" type="boolean" label="Adaptive thresholding pre-processing, useful for low light/low contrast images (-i)" truevalue="-i" falsevalue="" checked="false" /> + <param name="thinning" type="boolean" label="Additional thinning/scaling down of low quality documents (-j)" truevalue="-j" falsevalue="" checked="false" /> + + </inputs> + <outputs> + <data name="outfile" type="data" format="sdf"> + <change_format> + <when input="oformat" value="can" format="smi"/> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="infile" ftype="png" value="CID_2244.png"/> + <param name="oformat" value="sdf"/> + <output name="outfile" ftype="sdf" file="osra_on_CID2244.sdf"/> + </test> + <test> + <param name="infile" ftype="png" value="2008001635_153_chem.png"/> + <param name="oformat" value="can"/> + <output name="outfile" ftype="sdf" file="2008001635_153_chem.smi"/> + </test> + + </tests> + <help> +<![CDATA[ + +.. class:: infomark + +**What this tool does** + +OSRA_ (Optical Structure Recognition Application) is a utility designed to convert graphical representations of chemical structures into SMILES or SDF. It generates the SMILES or SDF representation of any molecular structure image within a document which is parseable by GraphicMagick. + +.. _OSRA: http://cactus.nci.nih.gov/osra/ + +----- + +.. class:: infomark + +**Cite** + +Igor V Filippov and Marc C Nicklaus - `Optical Structure Recognition Software To Recover Chemical Information: OSRA, An Open Source Solution`_ + +.. _`Optical Structure Recognition Software To Recover Chemical Information: OSRA, An Open Source Solution`: http://pubs.acs.org/doi/abs/10.1021/ci800067r +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme Sun Nov 01 10:31:25 2015 -0500 @@ -0,0 +1,20 @@ +OSRA: Optical Structure Recognition Application + +OSRA is a utility designed to convert graphical representations of chemical +structures, as they appear in journal articles, patent documents, textbooks, +trade magazines etc., into SMILES (Simplified Molecular Input Line Entry +Specification - see http://en.wikipedia.org/wiki/SMILES) or +SD files - a computer recognizable molecular structure format. +OSRA can read a document in any of the over 90 graphical formats parseable by +ImageMagick - including GIF, JPEG, PNG, TIFF, PDF, PS etc., and generate +the SMILES or SDF representation of the molecular structure images encountered +within that document. + +Note that any software designed for optical recognition is unlikely to be +perfect, and the output produced might, and probably will, contain errors, +so curation by a human knowledgeable in chemical structures is highly recommended. + +http://cactus.nci.nih.gov/osra/ + +The wrapper comes with an automatic installation of all dependencies through the +galaxy toolshed.
--- a/test-data/CID_3033.sdf Sun Nov 01 10:29:42 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,271 +0,0 @@ -3033 - -OEChem-08231107463D - - 30 31 0 0 0 0 0 0 0999 V2000 - 1.9541 1.1500 -2.5078 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - 1.1377 -1.6392 2.1136 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - -3.2620 -2.9284 -1.0647 O 0 0 0 0 0 0 0 0 0 0 0 0 - -2.7906 -1.9108 0.9092 O 0 0 0 0 0 0 0 0 0 0 0 0 - 0.2679 -0.2051 -0.3990 N 0 0 0 0 0 0 0 0 0 0 0 0 - -2.0640 0.5139 -0.3769 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.7313 0.7178 -0.0192 C 0 0 0 0 0 0 0 0 0 0 0 0 - -2.4761 -0.6830 -1.1703 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.6571 -0.2482 -0.1795 C 0 0 0 0 0 0 0 0 0 0 0 0 - -3.0382 1.4350 0.0081 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.3728 1.8429 0.7234 C 0 0 0 0 0 0 0 0 0 0 0 0 - -2.6797 2.5600 0.7506 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.3470 2.7640 1.1083 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.5353 0.3477 -1.0918 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.1740 -0.8865 0.9534 C 0 0 0 0 0 0 0 0 0 0 0 0 - -2.8480 -1.8749 -0.3123 C 0 0 0 0 0 0 0 0 0 0 0 0 - 3.9124 0.3058 -0.8739 C 0 0 0 0 0 0 0 0 0 0 0 0 - 3.5511 -0.9285 1.1713 C 0 0 0 0 0 0 0 0 0 0 0 0 - 4.4203 -0.3324 0.2576 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.7086 -0.9792 -1.8930 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.3614 -0.4266 -1.7676 H 0 0 0 0 0 0 0 0 0 0 0 0 - -0.0861 -1.1146 -0.6780 H 0 0 0 0 0 0 0 0 0 0 0 0 - -4.0812 1.2885 -0.2604 H 0 0 0 0 0 0 0 0 0 0 0 0 - 0.6569 2.0278 1.0167 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.4382 3.2769 1.0511 H 0 0 0 0 0 0 0 0 0 0 0 0 - -1.0683 3.6399 1.6868 H 0 0 0 0 0 0 0 0 0 0 0 0 - 4.6037 0.7654 -1.5758 H 0 0 0 0 0 0 0 0 0 0 0 0 - 3.9635 -1.4215 2.0480 H 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4925 -0.3651 0.4274 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.5025 -3.7011 -0.5102 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1 14 1 0 0 0 0 - 2 15 1 0 0 0 0 - 3 16 1 0 0 0 0 - 3 30 1 0 0 0 0 - 4 16 2 0 0 0 0 - 5 7 1 0 0 0 0 - 5 9 1 0 0 0 0 - 5 22 1 0 0 0 0 - 6 7 1 0 0 0 0 - 6 8 1 0 0 0 0 - 6 10 2 0 0 0 0 - 7 11 2 0 0 0 0 - 8 16 1 0 0 0 0 - 8 20 1 0 0 0 0 - 8 21 1 0 0 0 0 - 9 14 2 0 0 0 0 - 9 15 1 0 0 0 0 - 10 12 1 0 0 0 0 - 10 23 1 0 0 0 0 - 11 13 1 0 0 0 0 - 11 24 1 0 0 0 0 - 12 13 2 0 0 0 0 - 12 25 1 0 0 0 0 - 13 26 1 0 0 0 0 - 14 17 1 0 0 0 0 - 15 18 2 0 0 0 0 - 17 19 2 0 0 0 0 - 17 27 1 0 0 0 0 - 18 19 1 0 0 0 0 - 18 28 1 0 0 0 0 - 19 29 1 0 0 0 0 -M END -> <PUBCHEM_COMPOUND_CID> -3033 - -> <PUBCHEM_CONFORMER_RMSD> -0.6 - -> <PUBCHEM_CONFORMER_DIVERSEORDER> -1 -20 -18 -39 -29 -42 -38 -35 -30 -25 -33 -28 -32 -36 -26 -24 -40 -11 -27 -37 -7 -41 -10 -19 -43 -8 -6 -16 -44 -23 -34 -14 -15 -31 -9 -13 -17 -21 -22 -5 -12 -2 -3 -4 - -> <PUBCHEM_MMFF94_PARTIAL_CHARGES> -28 -1 -0.18 -10 -0.15 -11 -0.15 -12 -0.15 -13 -0.15 -14 0.18 -15 0.18 -16 0.66 -17 -0.15 -18 -0.15 -19 -0.15 -2 -0.18 -22 0.4 -23 0.15 -24 0.15 -25 0.15 -26 0.15 -27 0.15 -28 0.15 -29 0.15 -3 -0.65 -30 0.5 -4 -0.57 -5 -0.6 -6 -0.14 -7 0.1 -8 0.2 -9 0.1 - -> <PUBCHEM_EFFECTIVE_ROTOR_COUNT> -4 - -> <PUBCHEM_PHARMACOPHORE_FEATURES> -7 -1 3 acceptor -1 4 acceptor -1 5 cation -1 5 donor -3 3 4 16 anion -6 6 7 10 11 12 13 rings -6 9 14 15 17 18 19 rings - -> <PUBCHEM_HEAVY_ATOM_COUNT> -19 - -> <PUBCHEM_ATOM_DEF_STEREO_COUNT> -0 - -> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> -0 - -> <PUBCHEM_BOND_DEF_STEREO_COUNT> -0 - -> <PUBCHEM_BOND_UDEF_STEREO_COUNT> -0 - -> <PUBCHEM_ISOTOPIC_ATOM_COUNT> -0 - -> <PUBCHEM_COMPONENT_COUNT> -1 - -> <PUBCHEM_CACTVS_TAUTO_COUNT> -1 - -> <PUBCHEM_CONFORMER_ID> -00000BD900000001 - -> <PUBCHEM_MMFF94_ENERGY> -65.6362 - -> <PUBCHEM_FEATURE_SELFOVERLAP> -35.578 - -> <PUBCHEM_SHAPE_FINGERPRINT> -10366900 7 17386020514759110480 -114674 6 16903282898360328323 -11578080 2 17913245089295617604 -11582403 64 14544541357940910356 -11640471 11 18127963303313961600 -12236239 1 18272088352834916308 -12363563 72 18042978579496277287 -12553582 1 18190740839094073615 -12596599 1 18201439237582433270 -12788726 201 18410285909464206003 -13032168 30 18201440238019390274 -13140716 1 18187086113919468457 -13538477 17 18339642338307470464 -13583140 156 17241914119188522922 -13764800 53 17895191172601517065 -13965767 371 17259888045752176376 -14115302 16 18342181093776810149 -14787075 74 17907866106787333628 -15279307 12 18198622322777022915 -15375462 189 18270674264943931347 -15669948 3 18336550511731321249 -16752209 62 18336841852664817743 -16945 1 18188484791351783177 -19433438 48 18059583550169763352 -200 152 18130792217719576158 -20645476 183 18270115859187436189 -20905425 154 17970632883131290416 -21452121 199 18046637711133085653 -21639500 275 16988270998321974524 -22112679 90 18342446063036096292 -23419403 2 17835564502519425292 -23493267 7 18115023138028600728 -23526113 38 16660924516543134566 -23557571 272 17821721762863303772 -23559900 14 17896315990920094510 -23598288 3 18411412925846384519 -23598291 2 18059009613384180254 -238 59 16343141308025475526 -4340502 62 17273677940604857177 -6049 1 17240202131864233360 -6992083 37 18058168521433072460 -7615 1 18201433675414973908 -77492 1 18272651289913926852 -81228 2 17968373550240022809 -9709674 26 17896035610527288590 - -> <PUBCHEM_SHAPE_MULTIPOLES> -378.03 -7.01 -2.75 -1.77 -0.78 -1.58 -0.3 -0.41 -1.94 --1.08 -1.9 --8.69 -11.04 -2.58 - -> <PUBCHEM_SHAPE_SELFOVERLAP> -790.335 - -> <PUBCHEM_SHAPE_VOLUME> -214.7 - -> <PUBCHEM_COORDINATE_TYPE> -2 -5 -255 - -$$$$ -
--- a/test-data/confab_on_CID3033.sdf Sun Nov 01 10:29:42 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,270 +0,0 @@ -214.7 - OpenBabel06291213403D - - 30 31 0 0 0 0 0 0 0 0999 V2000 - 1.9541 1.1500 -2.5078 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - 1.1377 -1.6392 2.1136 Cl 0 0 0 0 0 0 0 0 0 0 0 0 - -3.2620 -2.9284 -1.0647 O 0 0 0 0 0 0 0 0 0 0 0 0 - -2.7906 -1.9108 0.9092 O 0 0 0 0 0 0 0 0 0 0 0 0 - 0.2679 -0.2051 -0.3990 N 0 0 0 0 0 0 0 0 0 0 0 0 - -2.0640 0.5139 -0.3769 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.7313 0.7178 -0.0192 C 0 0 0 0 0 0 0 0 0 0 0 0 - -2.4761 -0.6830 -1.1703 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.6571 -0.2482 -0.1795 C 0 0 0 0 0 0 0 0 0 0 0 0 - -3.0382 1.4350 0.0081 C 0 0 0 0 0 0 0 0 0 0 0 0 - -0.3728 1.8429 0.7234 C 0 0 0 0 0 0 0 0 0 0 0 0 - -2.6797 2.5600 0.7506 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.3470 2.7640 1.1083 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.5353 0.3477 -1.0918 C 0 0 0 0 0 0 0 0 0 0 0 0 - 2.1740 -0.8865 0.9534 C 0 0 0 0 0 0 0 0 0 0 0 0 - -2.8480 -1.8749 -0.3123 C 0 0 0 0 0 0 0 0 0 0 0 0 - 3.9124 0.3058 -0.8739 C 0 0 0 0 0 0 0 0 0 0 0 0 - 3.5511 -0.9285 1.1713 C 0 0 0 0 0 0 0 0 0 0 0 0 - 4.4203 -0.3324 0.2576 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.7086 -0.9792 -1.8930 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.3614 -0.4266 -1.7676 H 0 0 0 0 0 0 0 0 0 0 0 0 - -0.0861 -1.1146 -0.6780 H 0 0 0 0 0 0 0 0 0 0 0 0 - -4.0812 1.2885 -0.2604 H 0 0 0 0 0 0 0 0 0 0 0 0 - 0.6569 2.0278 1.0167 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.4382 3.2769 1.0511 H 0 0 0 0 0 0 0 0 0 0 0 0 - -1.0683 3.6399 1.6868 H 0 0 0 0 0 0 0 0 0 0 0 0 - 4.6037 0.7654 -1.5758 H 0 0 0 0 0 0 0 0 0 0 0 0 - 3.9635 -1.4215 2.0480 H 0 0 0 0 0 0 0 0 0 0 0 0 - 5.4925 -0.3651 0.4274 H 0 0 0 0 0 0 0 0 0 0 0 0 - -3.5025 -3.7011 -0.5102 H 0 0 0 0 0 0 0 0 0 0 0 0 - 1 14 1 0 0 0 0 - 2 15 1 0 0 0 0 - 3 16 1 0 0 0 0 - 3 30 1 0 0 0 0 - 4 16 2 0 0 0 0 - 5 7 1 0 0 0 0 - 5 9 1 0 0 0 0 - 5 22 1 0 0 0 0 - 6 7 1 0 0 0 0 - 6 8 1 0 0 0 0 - 6 10 2 0 0 0 0 - 7 11 2 0 0 0 0 - 8 16 1 0 0 0 0 - 8 20 1 0 0 0 0 - 8 21 1 0 0 0 0 - 9 14 2 0 0 0 0 - 9 15 1 0 0 0 0 - 10 12 1 0 0 0 0 - 10 23 1 0 0 0 0 - 11 13 1 0 0 0 0 - 11 24 1 0 0 0 0 - 12 13 2 0 0 0 0 - 12 25 1 0 0 0 0 - 13 26 1 0 0 0 0 - 14 17 1 0 0 0 0 - 15 18 2 0 0 0 0 - 17 19 2 0 0 0 0 - 17 27 1 0 0 0 0 - 18 19 1 0 0 0 0 - 18 28 1 0 0 0 0 - 19 29 1 0 0 0 0 -M END -> <PUBCHEM_COMPOUND_CID> -3033 - -> <PUBCHEM_CONFORMER_RMSD> -0.6 - -> <PUBCHEM_CONFORMER_DIVERSEORDER> -1 -20 -18 -39 -29 -42 -38 -35 -30 -25 -33 -28 -32 -36 -26 -24 -40 -11 -27 -37 -7 -41 -10 -19 -43 -8 -6 -16 -44 -23 -34 -14 -15 -31 -9 -13 -17 -21 -22 -5 -12 -2 -3 -4 - -> <PUBCHEM_MMFF94_PARTIAL_CHARGES> -28 -1 -0.18 -10 -0.15 -11 -0.15 -12 -0.15 -13 -0.15 -14 0.18 -15 0.18 -16 0.66 -17 -0.15 -18 -0.15 -19 -0.15 -2 -0.18 -22 0.4 -23 0.15 -24 0.15 -25 0.15 -26 0.15 -27 0.15 -28 0.15 -29 0.15 -3 -0.65 -30 0.5 -4 -0.57 -5 -0.6 -6 -0.14 -7 0.1 -8 0.2 -9 0.1 - -> <PUBCHEM_EFFECTIVE_ROTOR_COUNT> -4 - -> <PUBCHEM_PHARMACOPHORE_FEATURES> -7 -1 3 acceptor -1 4 acceptor -1 5 cation -1 5 donor -3 3 4 16 anion -6 6 7 10 11 12 13 rings -6 9 14 15 17 18 19 rings - -> <PUBCHEM_HEAVY_ATOM_COUNT> -19 - -> <PUBCHEM_ATOM_DEF_STEREO_COUNT> -0 - -> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> -0 - -> <PUBCHEM_BOND_DEF_STEREO_COUNT> -0 - -> <PUBCHEM_BOND_UDEF_STEREO_COUNT> -0 - -> <PUBCHEM_ISOTOPIC_ATOM_COUNT> -0 - -> <PUBCHEM_COMPONENT_COUNT> -1 - -> <PUBCHEM_CACTVS_TAUTO_COUNT> -1 - -> <PUBCHEM_CONFORMER_ID> -00000BD900000001 - -> <PUBCHEM_MMFF94_ENERGY> -65.6362 - -> <PUBCHEM_FEATURE_SELFOVERLAP> -35.578 - -> <PUBCHEM_SHAPE_FINGERPRINT> -10366900 7 17386020514759110480 -114674 6 16903282898360328323 -11578080 2 17913245089295617604 -11582403 64 14544541357940910356 -11640471 11 18127963303313961600 -12236239 1 18272088352834916308 -12363563 72 18042978579496277287 -12553582 1 18190740839094073615 -12596599 1 18201439237582433270 -12788726 201 18410285909464206003 -13032168 30 18201440238019390274 -13140716 1 18187086113919468457 -13538477 17 18339642338307470464 -13583140 156 17241914119188522922 -13764800 53 17895191172601517065 -13965767 371 17259888045752176376 -14115302 16 18342181093776810149 -14787075 74 17907866106787333628 -15279307 12 18198622322777022915 -15375462 189 18270674264943931347 -15669948 3 18336550511731321249 -16752209 62 18336841852664817743 -16945 1 18188484791351783177 -19433438 48 18059583550169763352 -200 152 18130792217719576158 -20645476 183 18270115859187436189 -20905425 154 17970632883131290416 -21452121 199 18046637711133085653 -21639500 275 16988270998321974524 -22112679 90 18342446063036096292 -23419403 2 17835564502519425292 -23493267 7 18115023138028600728 -23526113 38 16660924516543134566 -23557571 272 17821721762863303772 -23559900 14 17896315990920094510 -23598288 3 18411412925846384519 -23598291 2 18059009613384180254 -238 59 16343141308025475526 -4340502 62 17273677940604857177 -6049 1 17240202131864233360 -6992083 37 18058168521433072460 -7615 1 18201433675414973908 -77492 1 18272651289913926852 -81228 2 17968373550240022809 -9709674 26 17896035610527288590 - -> <PUBCHEM_SHAPE_MULTIPOLES> -378.03 -7.01 -2.75 -1.77 -0.78 -1.58 -0.3 -0.41 -1.94 --1.08 -1.9 --8.69 -11.04 -2.58 - -> <PUBCHEM_SHAPE_SELFOVERLAP> -790.335 - -> <PUBCHEM_SHAPE_VOLUME> -214.7 - -> <PUBCHEM_COORDINATE_TYPE> -2 -5 -255 - -$$$$
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/2008001635_153_chem.smi Sun Nov 01 10:31:25 2015 -0500 @@ -0,0 +1,1 @@ +CCC(c1ccc(cc1)Br)OCCCO
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test_data/CID_2244.sdf Sun Nov 01 10:31:25 2015 -0500 @@ -0,0 +1,155 @@ +2244 + -OEChem-05151212332D + + 21 21 0 0 0 0 0 0 0999 V2000 + 3.7320 -0.0600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 1.4400 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -1.5600 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.5981 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 -1.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 0.9400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.8660 -0.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0000 -0.0600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.0611 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 5.4641 -2.6800 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8671 -1.8700 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.3100 0.4769 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.4631 0.2500 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1.6900 -0.5969 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3301 2.0600 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 1 12 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 21 1 0 0 0 0 + 3 11 2 0 0 0 0 + 4 12 2 0 0 0 0 + 5 6 1 0 0 0 0 + 5 7 2 0 0 0 0 + 6 8 2 0 0 0 0 + 6 11 1 0 0 0 0 + 7 9 1 0 0 0 0 + 7 14 1 0 0 0 0 + 8 10 1 0 0 0 0 + 8 15 1 0 0 0 0 + 9 10 2 0 0 0 0 + 9 16 1 0 0 0 0 + 10 17 1 0 0 0 0 + 12 13 1 0 0 0 0 + 13 18 1 0 0 0 0 + 13 19 1 0 0 0 0 + 13 20 1 0 0 0 0 +M END +> <PUBCHEM_COMPOUND_CID> +2244 + +> <PUBCHEM_COMPOUND_CANONICALIZED> +1 + +> <PUBCHEM_CACTVS_COMPLEXITY> +212 + +> <PUBCHEM_CACTVS_HBOND_ACCEPTOR> +4 + +> <PUBCHEM_CACTVS_HBOND_DONOR> +1 + +> <PUBCHEM_CACTVS_ROTATABLE_BOND> +3 + +> <PUBCHEM_CACTVS_SUBSKEYS> +AAADccBwOAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGgAACAAADASAmAAyDoAABgCIAiDSCAACCAAkIAAIiAEGCMgMJzaENRqCe2Cl4BEIuYeIyCCOAAAAAAAIAAAAAAAAABAAAAAAAAAAAA== + +> <PUBCHEM_IUPAC_OPENEYE_NAME> +2-acetoxybenzoic acid + +> <PUBCHEM_IUPAC_CAS_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_SYSTEMATIC_NAME> +2-acetyloxybenzoic acid + +> <PUBCHEM_IUPAC_TRADITIONAL_NAME> +2-acetoxybenzoic acid + +> <PUBCHEM_IUPAC_INCHI> +InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12) + +> <PUBCHEM_IUPAC_INCHIKEY> +BSYNRYMUTXBXSQ-UHFFFAOYSA-N + +> <PUBCHEM_XLOGP3> +1.2 + +> <PUBCHEM_EXACT_MASS> +180.042259 + +> <PUBCHEM_MOLECULAR_FORMULA> +C9H8O4 + +> <PUBCHEM_MOLECULAR_WEIGHT> +180.15742 + +> <PUBCHEM_OPENEYE_CAN_SMILES> +CC(=O)OC1=CC=CC=C1C(=O)O + +> <PUBCHEM_OPENEYE_ISO_SMILES> +CC(=O)OC1=CC=CC=C1C(=O)O + +> <PUBCHEM_CACTVS_TPSA> +63.6 + +> <PUBCHEM_MONOISOTOPIC_WEIGHT> +180.042259 + +> <PUBCHEM_TOTAL_CHARGE> +0 + +> <PUBCHEM_HEAVY_ATOM_COUNT> +13 + +> <PUBCHEM_ATOM_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_ATOM_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_DEF_STEREO_COUNT> +0 + +> <PUBCHEM_BOND_UDEF_STEREO_COUNT> +0 + +> <PUBCHEM_ISOTOPIC_ATOM_COUNT> +0 + +> <PUBCHEM_COMPONENT_COUNT> +1 + +> <PUBCHEM_CACTVS_TAUTO_COUNT> +1 + +> <PUBCHEM_COORDINATE_TYPE> +1 +5 +255 + +> <PUBCHEM_BONDANNOTATIONS> +5 6 8 +5 7 8 +6 8 8 +7 9 8 +8 10 8 +9 10 8 + +$$$$ +
--- a/tool_dependencies.xml Sun Nov 01 10:29:42 2015 -0500 +++ b/tool_dependencies.xml Sun Nov 01 10:31:25 2015 -0500 @@ -1,30 +1,74 @@ <?xml version="1.0"?> <tool_dependency> - <package name="eigen2" version="2.0.17"> - <repository changeset_revision="08253ca817b7" name="package_eigen_2_0" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <package name="openbabel" version="2.3.2"> + <repository changeset_revision="ac9bf7ac1281" name="package_openbabel_2_3" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> - <package name="confab" version="1.0.1"> + <package name="graphicsmagick" version="1.3.18"> + <repository changeset_revision="bff3f66adff2" name="package_graphicsmagick_1_3" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="osra" version="2.0.0"> <install version="1.0"> <actions> - <action type="download_by_url">https://github.com/bgruening/confab/archive/1.0.1.tar.gz</action> + <!-- first action is always downloading --> + <action type="download_by_url">http://downloads.sourceforge.net/project/osra/osra/2.0.0/osra-2.0.0.tgz</action> <!-- populate the environment variables from the dependend repos --> <action type="set_environment_for_install"> - <repository changeset_revision="08253ca817b7" name="package_eigen_2_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> - <package name="eigen2" version="2.0.17" /> + <repository changeset_revision="ac9bf7ac1281" name="package_openbabel_2_3" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> + <package name="openbabel" version="2.3.2" /> + </repository> + <repository changeset_revision="bff3f66adff2" name="package_graphicsmagick_1_3" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> + <package name="graphicsmagick" version="1.3.18" /> </repository> </action> - <action type="shell_command">cmake . -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR -DEIGEN2_INCLUDE_DIR=$EIGEN2_SOURCE_PATH</action> + <!--compiling potrace-1.11 --> + <action type="download_file">http://potrace.sourceforge.net/download/1.11/potrace-1.11.tar.gz</action> + <action type="shell_command">tar xfvz potrace-1.11.tar.gz && cd potrace-1.11 && ./configure --with-libpotrace --prefix=$INSTALL_DIR/potrace/build && make && make install</action> + + <!--compiling gocr 0.50patched --> + <!-- not yet possible + <action type="download_file">http://www-e.uni-magdeburg.de/jschulen/ocr/gocr-0.50.tar.gz</action> + <action type="shell_command">tar xfvz gocr-0.50.tar.gz && cd gocr-0.50 && ./configure -|-prefix=$INSTALL_DIR/gocr/build && make libs && make all install</action> + --> + <action type="download_file">http://downloads.sourceforge.net/project/osra/gocr-patched/gocr-0.50pre-patched.tgz</action> + <action type="shell_command">tar xfvz gocr-0.50pre-patched.tgz && cd gocr-0.50pre-patched && ./configure --prefix=$INSTALL_DIR/gocr/build && make libs && make all install</action> + + + <!--compiling tclap 1.2.1 --> + <action type="download_file">http://downloads.sourceforge.net/project/tclap/tclap-1.2.1.tar.gz</action> + <action type="shell_command">tar xfvz tclap-1.2.1.tar.gz && cd tclap-1.2.1 && ./configure --prefix=$INSTALL_DIR/tclap/build && make && make install</action> + + <!--compiling ocrad 0.21 --> + <action type="download_file">http://mirror.checkdomain.de/gnu/ocrad/ocrad-0.21.tar.gz</action> + <action type="shell_command">tar xfvz ocrad-0.21.tar.gz && cd ocrad-0.21 && ./configure --prefix=$INSTALL_DIR/ocrad/build && make && make install</action> + + <!--compiling cuneiform 1.1.0 --> + <action type="download_file">https://launchpad.net/cuneiform-linux/1.1/1.1/+download/cuneiform-linux-1.1.0.tar.bz2</action> + <action type="shell_command">tar xfvj cuneiform-linux-1.1.0.tar.bz2 && cd cuneiform-linux-1.1.0 && mkdir build && cd build && cmake .. -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR/cuneiform/build/ && make && make install</action> + + <!--And finally OSRA 1.4.0--> + <!-- Extending the PATH is needed for OSRA --> + <action type="shell_command">export PATH=$PATH:$GRAPHICSMAGICK_ROOT_DIR/bin/ && + ./configure --with-tclap-include=$INSTALL_DIR/tclap/build/include/ --with-potrace-include=$INSTALL_DIR/potrace/build/include/ --with-potrace-lib=$INSTALL_DIR/potrace/build/lib/ --with-gocr-include=$INSTALL_DIR/gocr/build/include/gocr/ --with-gocr-lib=$INSTALL_DIR/gocr/build/lib/ --with-ocrad-include=$INSTALL_DIR/ocrad/build/include/ --with-ocrad-lib=$INSTALL_DIR/ocrad/build/lib/ --with-cuneiform-include=$INSTALL_DIR/cuneiform/build/include/ --with-cuneiform --with-cuneiform-lib=$INSTALL_DIR/cuneiform/build/install/lib/ --with-openbabel-include=$OPENBABEL_INCLUDE_DIR/openbabel-2.0/ --with-openbabel-lib=$OPENBABEL_LIB_DIR --with-graphicsmagick-lib=$GRAPHICSMAGICK_ROOT_DIR/lib/ --with-graphicsmagick-include=$GRAPHICSMAGICK_ROOT_DIR/include/GraphicsMagick/ --prefix=$INSTALL_DIR </action> <action type="shell_command">make</action> <action type="shell_command">make install</action> + + <action type="shell_command">rm $INSTALL_DIR/tclap/ -r</action> + <!--<action type="shell_command">rm $INSTALL_DIR/openbabel/ -r</action>--> + <action type="shell_command">rm $INSTALL_DIR/gocr/ -r</action> + <action type="shell_command">rm $INSTALL_DIR/ocrad/ -r</action> + <action type="shell_command">rm $INSTALL_DIR/cuneiform/ -r</action> + <action type="set_environment"> + <environment_variable action="prepend_to" name="LD_LIBRARY_PATH">$ENV[GRAPHICSMAGICK_ROOT_DIR]/lib/</environment_variable> + <environment_variable action="prepend_to" name="LD_LIBRARY_PATH">$INSTALL_DIR/potrace/build/lib/</environment_variable> <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - <environment_variable action="set_to" name="BABEL_DATADIR">$INSTALL_DIR/share/openbabel</environment_variable> - <environment_variable action="set_to" name="BABEL_LIBDIR">$INSTALL_DIR/lib/openbabel/2.2.99</environment_variable> + <!-- OSRA_DATA_FILES is only used by the galaxy wrapper and is not part of OSRA --> + <environment_variable action="set_to" name="OSRA_DATA_FILES">$INSTALL_DIR/share</environment_variable> </action> </actions> </install> - <readme>Compiling Confab requires g++, CMake 2.4+. Optional but required for a few features is libxml2 and zlib.</readme> + <readme>We still have a handfull of requirements</readme> </package> </tool_dependency>