view filter/ob_filter.xml @ 10:cc63a42fd22e

Uploaded
author bgruening
date Sat, 25 May 2013 12:54:40 -0400
parents 12822efbd4a4
children 3d3e7c1d0f49
line wrap: on
line source

<tool id="ctb_filter" name="Filter" version="1.0">
  <description> a set of molecules from a file</description>
  <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism>
  <requirements>
    <requirement type="package" version="2.3.2">openbabel</requirement>
    <requirement type="package" version="0.1">cheminfolib</requirement>
  </requirements>
  <command interpreter="python">
    ob_filter.py
      -i "${infile}"
      -o "${outfile}"
      -iformat "${infile.ext}"
      -oformat "${infile.ext}"
      --filters '{
        #if $filter_methods.filter_methods_opts == "ruleof5":
          "hbd" : [0, 5],
          "hba" : [0, 10],
          "molwt" : [0, 500],
          "logp" : [-5, 5],
        #elif $filter_methods.filter_methods_opts == "LeadLike":
          "rotbonds" : [0, 7],
          "molwt" : [0, 350],
          "logp" : [-5, 3.5],
        #elif $filter_methods.filter_methods_opts == "DrugLike":
          "hba" : [0, 10],
          "rotbonds" : [0, 8],
          "molwt" : [150, 500],
          "logp" : [-5, 5],
          "psa" : [0, 150],
        #elif $filter_methods.filter_methods_opts == "FragmentLike":
          "rotbonds" : [0, 5],
          "molwt" : [0, 250],
          "logp" : [-5, 2.5],
        #else:
          #for $filter in $filter_methods.filter_set:
            #set $filter_selected = $filter.filter_sel.filter_sel_opts
            #set $filter_min = $filter_selected + "_min"
            #set $filter_max = $filter_selected + "_max"
            "$filter_selected" : [$filter.filter_sel[$filter_min], $filter.filter_sel[$filter_max] ],
          #end for
        #end if
      }'
  </command>
  <inputs>
    <param name="infile" type="data" format="sdf,smi,mol,mol2,cml,inchi" label="Select input file previously created with the 'Compute physico-chemical properties' tool"/>
    <conditional name="filter_methods">
      <param name="filter_methods_opts" type="select" label="Select a pre-defined filtering set">
        <option value="user">User-defined properties</option>
        <option value="ruleof5">Lipinski's Rule-of-Five</option>
        <option value="LeadLike">Lead-like properties</option>
        <option value="DrugLike">Drug-like properties</option>
        <option value="FragmentLike">Fragment-like properties</option>
      </param>
      <when value="ruleof5" />
      <when value="LeadLike" />
      <when value="DrugLike" />
      <when value="FragmentLike" />
      <when value="user">
        <repeat name="filter_set" title="Filters selection">
          <conditional name="filter_sel">
            <param name="filter_sel_opts" type="select" label="Select properties to filter">
              <option value="hbd">Number of Hydrogen-bond donor groups</option>
              <option value="hba">Number of Hydrogen-bond acceptor groups</option>
              <option value="psa">Total polar Surface Area</option>
              <option value="rotbonds">Number of rotatable bonds</option>
              <option value="molwt">Molecular weight</option>
              <option value="logp">Predicted value of LogP</option>
              <option value="mr">Predicted value for the Molecular Refractivity</option>
              <option value="atoms">Number of atoms</option>
              <!-- Add later, we need to add a new smarts pattern to plugindefines.txt
              <option value="hatoms">Number of heavy atoms</option>-->
              <!-- We skip that options, for further information please read: http://www.dalkescientific.com/writings/diary/archive/2011/06/04/dealing_with_sssr.html -->
              <!--<option value="rings">Number of rings</option>-->
            </param>
            <when value="hbd">
              <param name="hbd_min" size="3" type="integer" value="" label="Minimum number of HB donors"/>
              <param name="hbd_max" size="3" type="integer" value="" label="Maximum number of HB donors"/>
            </when>
            <when value="hba">
              <param name="hba_min" size="3" type="integer" value="" label="Minimum number of HB acceptors"/>
              <param name="hba_max" size="3" type="integer" value="" label="Maximum number of HB acceptors"/>
            </when>
            <when value="psa">
              <param name="psa_min" size="5" type="integer" value="" label="Minimum threshold for the Total Polar Surface Area"/>
              <param name="psa_max" size="5" type="integer" value="" label="Maximum threshold for the Total Polar Surface Area"/>
            </when>
            <when value="rotbonds">
              <param name="rotbonds_min" size="3" type="integer" value="" label="Minimum number of rotatable bonds"/>
              <param name="rotbonds_max" size="3" type="integer" value="" label="Maximum number of rotatable bonds"/>
            </when>
            <when value="molwt">
              <param name="molwt_min" size="5" type="integer" value="" label="Minimum threshold value for the Molecular Weight"/>
              <param name="molwt_max" size="5" type="integer" value="" label="Maximum threshold value for the Molecular Weight"/>
            </when>
            <when value="logp">
              <param name="logp_min" size="4" type="float" value="" label="Minimum threshold value for the log-P"/>
              <param name="logp_max" size="4" type="float" value="" label="Maximum threshold value for the log-P"/>
            </when>
            <when value="mr">
              <param name="mr_min" size="4" type="float" value="" label="Minimum threshold value for the Molecular Refractivity"/>
              <param name="mr_max" size="4" type="float" value="" label="Maximum threshold value for the Molecular Refractivity"/>
            </when>
            <when value="atoms">
              <param name="atoms_min" size="3" type="integer" value="" label="Minimum number of atoms"/>
              <param name="atoms_max" size="3" type="integer" value="" label="Maximum number of atoms"/>
            </when>
            <!--<when value="rings">
              <param name="rings_min" size="3" type="integer" value="" label="Minimum number of rings"/>
              <param name="rings_max" size="3" type="integer" value="" label="Maximum number of rings"/>
            </when>-->
          </conditional>
        </repeat>
      </when>
    </conditional>
  </inputs>
  <outputs>
        <data format_source="infile" name="outfile" />
  </outputs>
  <tests>
    <test>
        <param name="infile" ftype="smi" value="CID_2244.smi"/>
        <param name="filter_methods_opts" value="ruleof5" />
        <output name="outfile" ftype="smi" file="ob_filter_on_CID2244.smi" />
    </test>
    <!--
    Limitation of the test framework:
    https://trello.com/card/disambiguated-conditional-parameters-not-supported-in-unit-tests/506338ce32ae458f6d15e4b3/820
    <test>
        <param name="infile" ftype="smi" value="CID_2244.smi"/>
        <param name="filter_methods_opts" value="user" />
        <param name="filter_sel_opts" value="hba" />
        <param name="hbd_min" value="0" />
        <param name="hbd_max" value="5" />
        <output name="outfile" ftype="smi" file="ob_filter_on_CID2244_2.smi" />
    </test>
    -->
  </tests>
  <help>


**What does this tool do?**

  This tool filters a library of compounds provided by the user, based on a set of physico-chemical user-defined parameters. Multiple parameters can be selected for more specific queries.

-----

.. class:: infomark

**Tip:** If your input file is in SDF format you can use the 'Compute physico-chemical properties' tool to precalulate the properties and use the filter on that precomputed dataset. It should be faster and can be reused but its bigger than a SMILES file.

**Tip:** for exact matchings please use the target value for both the minimum and maximum parameters (e.g. a selection of EXACTLY 4 rotatable bonds can be performed by selecting 4 minimum and 4 maximum as values for the property).

**Tip:** selecting multiple times the same property with different parameters will result in querying the largest overlapping subset of values for the parameter (e.g. a selection of between 0 and 3 rotatable bonds plus a selection between 2 and 4 will result in a query for compounds between 2 and 3 rotatable bonds).

-----

.. class:: infomark

**Definition of the pre-defined filtering rules:**
  **# Lipinski's Rule of Five:**
    =&lt; 5 Hydrogen-bond donor groups

    =&lt; 10 Hydrogen-bond acceptor groups

    =&lt; 500 Molecular weight

    =&lt; 5 octanol/water partition coefficient (log P)

  **# Lead Like properties** (Teague, Davis, Leeson, Oprea, Angew Chem Int Ed Engl. 1999 Dec 16;38(24):3743-3748):
    =&lt; 7 rotatable bonds

    =&lt; 350 Molecular weight

    =&lt; 3.5 octanol/water partition coefficient (log P)

  **# Drug Like properties** (Lipinski, J Pharmacol Toxicol Methods. 2000 Jul-Aug;44(1):235-49):
    =&lt; 10 Hydrogen-bond acceptor groups

    =&lt; 8 rotatable bonds

    150 =&lt; Molecular weight =&lt; 500

    =&lt; 150 Polar Surface Area

    =&lt; 5 octanol/water partition coefficient (log P)

  **# Fragment Like properties** (Carr RA, Congreve M, Murray CW, Rees DC, Drug Discov Today. 2005 Jul 15;10(14):987):
    =&lt; 5 rotatable bonds

    =&lt; 250 Molecular weight

    =&lt; 2.5 octanol/water partition coefficient (log P)

-----

.. class:: infomark

**Output:** the output formats included in this implementation are:

    # sdf formatted coordinates of the molecules, with selected properties stored as meta-data for each compound.

    # SMILES, InChI or mol2 formatted files containing the 1D strings or 3D coordinates of each compound.

-----

**This tool uses Pybel:** N. M. O'Boyle, C. Morley and G. R. Hutchison. Pybel: a Python wrapper for the OpenBabel cheminformatics toolkit Chem. Cent. J. 2008, 2, 5.

  </help>
</tool>