Mercurial > repos > bgruening > osra

<tool id="ctb_osra" name="Molecule recognition" version="2.1.0">
    <description>in PDF documents (OSRA)</description>
    <requirements>
        <requirement type="package" version="2.1.0">osra</requirement>
        <requirement type="package" version="0.52">poppler</requirement>
    </requirements>
    <command detect_errors="aggressive">
<![CDATA[
osra
  -f $oformat
  '$infile'
  -l `which osra | xargs dirname`/../share/spelling.txt
  -a `which osra | xargs dirname`/../share/superatom.txt

  $confidence
  $adaptive
  $thinning
  > '$outfile'
]]>
    </command>
    <inputs>
        <param name="infile" type="data" format="pdf" label="PDF with molecules"/>
        <param name="oformat" type="select" label="Output molecule format">
            <option value="can">SMILES</option>
            <option value="sdf">SDF</option>
        </param>
        <param name="confidence" type="boolean" label="Print out confidence estimate" help=" (-p)"
            truevalue="-p" falsevalue="" checked="true" />
        <param name="adaptive" type="boolean" label="Adaptive thresholding pre-processing, useful for low light/low contrast images" help="(-i)"
            truevalue="-i" falsevalue="" checked="false" />
        <param name="thinning" type="boolean" label="Additional thinning/scaling down of low quality documents" help="(-j)"
            truevalue="-j" falsevalue="" checked="false" />

    </inputs>
    <outputs>
        <data name="outfile" format="sdf">
            <change_format>
                <when input="oformat" value="can" format="smi"/>
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="CID_2244.pdf" ftype="pdf"/>
            <param name="oformat" value="sdf"/>
            <output name="outfile" ftype="sdf" file="osra_on_CID2244.sdf"/>
        </test>
        <!-- <test>
            <param name="infile" value="CID_2244.png" ftype="png"/>
            <param name="oformat" value="sdf"/>
            <output name="outfile" ftype="sdf" file="osra_on_CID2244.sdf"/>
        </test> -->
        <!--test>
            PNGs producing a core-dump in osra
            <param name="infile" value="2008001635_153_chem.png"/>
            <param name="oformat" value="can"/>
            <output name="outfile" ftype="smi" file="2008001635_153_chem.smi"/>
        </test-->
    </tests>
    <help>
<![CDATA[

.. class:: infomark

**What this tool does**

OSRA_ (Optical Structure Recognition Application) is a utility designed to convert graphical representations of chemical structures, contained within a PDF file, into a SMILES or SDF representation.

.. _OSRA: http://cactus.nci.nih.gov/osra/

.. _`Optical Structure Recognition Software To Recover Chemical Information: OSRA, An Open Source Solution`: http://pubs.acs.org/doi/abs/10.1021/ci800067r
]]>
    </help>
    <citations>
        <citation type="bibtex">
            @inproceedings{filippov2010improvements,
            title={Improvements in Optical Structure Recognition Application},
            author={Filippov, Igor V and Nicklaus, Marc C and Kinney, John},
            booktitle={Document Analysis Systems Workshop},
            year={2010}
            }
        </citation>
    </citations>
</tool>
author	bgruening
date	Tue, 07 May 2019 13:28:05 -0400
parents	6cafe4f5d181
children