Mercurial > repos > bgruening > openbabel_filter
view ob_filter.xml @ 12:2213bfc4567a draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit cda909c5e0b88fa3d12abe43fc72b8dd0729417a"
author | bgruening |
---|---|
date | Thu, 09 Apr 2020 13:55:49 +0000 |
parents | 9cc9947c84a1 |
children | 1c0e4c2ac9ce |
line wrap: on
line source
<tool id="openbabel_filter" name="Filter" version="@VERSION@.1"> <description> a set of molecules from a file</description> <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism--> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="aggressive"> <![CDATA[ python '$__tool_directory__/ob_filter.py' -i '${infile}' -o '${outfile}' -iformat "${infile.ext}" -oformat "${infile.ext}" #if $filter_methods.filter_methods_opts == "__filter_by_name__": --list_of_names '$name_file' --filters '__filter_by_name__' #else --filters '{ #if $filter_methods.filter_methods_opts == "ruleof5": "hbd" : [0, 5], "hba" : [0, 10], "molwt" : [0, 500], "logp" : [-5, 5], #elif $filter_methods.filter_methods_opts == "LeadLike": "rotbonds" : [0, 7], "molwt" : [0, 350], "logp" : [-5, 3.5], #elif $filter_methods.filter_methods_opts == "DrugLike": "hba" : [0, 10], "rotbonds" : [0, 8], "molwt" : [150, 500], "logp" : [-5, 5], "psa" : [0, 150], #elif $filter_methods.filter_methods_opts == "FragmentLike": "rotbonds" : [0, 5], "molwt" : [0, 250], "logp" : [-5, 2.5], #else: #for $filter in $filter_methods.filter_set: #if $filter.filter_sel.filter_sel_opts == "field": #set $filter_selected = $filter.filter_sel['field_name'] "$filter_selected": [$filter.filter_sel['field_min'], $filter.filter_sel['field_max'] ], #else: #set $filter_selected = $filter.filter_sel.filter_sel_opts #set $filter_min = $filter_selected + "_min" #set $filter_max = $filter_selected + "_max" "$filter_selected" : [$filter.filter_sel[$filter_min], $filter.filter_sel[$filter_max] ], #end if #end for #end if }' #end if ]]> </command> <inputs> <param name="infile" type="data" format="sdf,smi,mol,mol2,cml,inchi" label="Select input file (can be previously annotated with the 'Compute physico-chemical properties' tool)"/> <conditional name="filter_methods"> <param name="filter_methods_opts" type="select" label="Select a pre-defined filtering set"> <option value="user">User-defined properties</option> <option value="ruleof5">Lipinski's Rule-of-Five</option> <option value="LeadLike">Lead-like properties</option> <option value="DrugLike">Drug-like properties</option> <option value="FragmentLike">Fragment-like properties</option> <option value="__filter_by_name__">Filter by molecule name</option> </param> <when value="__filter_by_name__"> <param name="name_file" type="data" format="txt,tabular" label="File with molecule names that match the names in the SDF file" help="Every name one line."/> </when> <when value="ruleof5" /> <when value="LeadLike" /> <when value="DrugLike" /> <when value="FragmentLike" /> <when value="user"> <repeat name="filter_set" title="Select custom filters"> <conditional name="filter_sel"> <param name="filter_sel_opts" type="select" label="Select properties to filter"> <option value="hbd">Number of hydrogen-bond donor groups</option> <option value="hba">Number of hydrogen-bond acceptor groups</option> <option value="psa">Total polar surface area</option> <option value="rotbonds">Number of rotatable bonds</option> <option value="molwt">Molecular weight</option> <option value="logp">Predicted value of logP</option> <option value="mr">Predicted value for the molecular refractivity</option> <option value="atoms">Number of atoms</option> <option value="field">Filter by a user-defined SDF property</option> <!-- Add later, we need to add a new smarts pattern to plugindefines.txt <option value="hatoms">Number of heavy atoms</option>--> <!-- We skip that options, for further information please read: http://www.dalkescientific.com/writings/diary/archive/2011/06/04/dealing_with_sssr.html --> <!--<option value="rings">Number of rings</option>--> </param> <when value="hbd"> <param name="hbd_min" type="integer" value="" label="Minimum number of hydrogen bond donors"/> <param name="hbd_max" type="integer" value="" label="Maximum number of hydrogen bond donors"/> </when> <when value="hba"> <param name="hba_min" type="integer" value="" label="Minimum number of hydrogen bond acceptors"/> <param name="hba_max" type="integer" value="" label="Maximum number of hydrogen bond acceptors"/> </when> <when value="psa"> <param name="psa_min" type="integer" value="" label="Minimum threshold for the total polar surface area"/> <param name="psa_max" type="integer" value="" label="Maximum threshold for the total polar surface area"/> </when> <when value="rotbonds"> <param name="rotbonds_min" type="integer" value="" label="Minimum number of rotatable bonds"/> <param name="rotbonds_max" type="integer" value="" label="Maximum number of rotatable bonds"/> </when> <when value="molwt"> <param name="molwt_min" type="integer" value="" label="Minimum threshold value for the molecular weight"/> <param name="molwt_max" type="integer" value="" label="Maximum threshold value for the molecular weight"/> </when> <when value="logp"> <param name="logp_min" type="float" value="" label="Minimum threshold value for log P"/> <param name="logp_max" type="float" value="" label="Maximum threshold value for log P"/> </when> <when value="mr"> <param name="mr_min" type="float" value="" label="Minimum threshold value for the molecular refractivity"/> <param name="mr_max" type="float" value="" label="Maximum threshold value for the molecular refractivity"/> </when> <when value="atoms"> <param name="atoms_min" type="integer" value="" label="Minimum number of atoms"/> <param name="atoms_max" type="integer" value="" label="Maximum number of atoms"/> </when> <when value="field"> <param name="field_name" type="text" value="" label="Name of the SDF property to filter by"> <sanitizer invalid_char=""> <valid initial="string.ascii_letters,string.digits"> <add value="_" /> <add value="." /> <add value=" " /> </valid> </sanitizer> </param> <param name="field_min" type="integer" value="" label="Minimum value"/> <param name="field_max" type="integer" value="" label="Maximum value"/> </when> <!--<when value="rings"> <param name="rings_min" type="integer" value="" label="Minimum number of rings"/> <param name="rings_max" type="integer" value="" label="Maximum number of rings"/> </when>--> </conditional> </repeat> </when> </conditional> </inputs> <outputs> <expand macro="output_like_input"/> </outputs> <tests> <test> <param name="infile" value="ligands_with_title.sdf" ftype="sdf" /> <param name="filter_methods_opts" value="__filter_by_name__" /> <param name="name_file" value="name_file.txt" /> <output name="outfile" file="filterd_by_name.sdf" ftype="sdf" lines_diff="4" /> </test> <test> <param name="infile" ftype="smi" value="CID_2244.smi"/> <param name="filter_methods_opts" value="ruleof5" /> <output name="outfile" ftype="smi" file="ob_filter_on_CID2244.smi" /> </test> <test> <param name="infile" ftype="sdf" value="CID_2244.sdf"/> <param name="filter_methods_opts" value="user" /> <param name="filter_sel_opts" value="field" /> <param name="field_name" value="PUBCHEM_EXACT_MASS"/> <param name="field_min" value="100"/> <param name="field_max" value="200"/> <output name="outfile" ftype="sdf" file="ob_filter_on_CID2244.sdf" lines_diff="2"/> </test> <!-- Limitation of the test framework: https://trello.com/card/disambiguated-conditional-parameters-not-supported-in-unit-tests/506338ce32ae458f6d15e4b3/820 <test> <param name="infile" ftype="smi" value="CID_2244.smi"/> <param name="filter_methods_opts" value="user" /> <param name="filter_sel_opts" value="hba" /> <param name="hbd_min" value="0" /> <param name="hbd_max" value="5" /> <output name="outfile" ftype="smi" file="ob_filter_on_CID2244_2.smi" /> </test> --> </tests> <help> <![CDATA[ .. class:: infomark **What this tool does** Filters a library of compounds based on user-defined physico-chemical parameters or predefined options (e.g. Ro5, lead-like properties, etc.). Multiple parameters can be selected for more specific queries. ----- .. class:: warningmark **Hint** | If your input file is in SDF format you can use the *Compute physico-chemical properties* tool to precalulate the properties and use the filter on that precomputed dataset. This should be faster and the file can be reused, although it is larger than a SMILES file. | | For exact matches use the target value for both minimum and maximum parameters (e.g. a selection of exactly 4 rotatable bonds can be performed by selecting 4 as minimum and maximum value). | | Selecting the same property multiple times with different parameters will result in querying the largest overlapping subset of values for the parameter (e.g. a selection of between 0 and 3 rotatable bonds plus a selection between 2 and 4 will result in a query for compounds between 2 and 3 rotatable bonds). ----- .. class:: infomark **Definition of the pre-defined filtering rules** **# Lipinski's Rule of Five:** =< 5 Hydrogen-bond donor groups =< 10 Hydrogen-bond acceptor groups =< 500 Molecular weight =< 5 octanol/water partition coefficient (log P) **# Lead Like properties** (Teague, Davis, Leeson, Oprea, Angew Chem Int Ed Engl. 1999 Dec 16;38(24):3743-3748): =< 7 rotatable bonds =< 350 Molecular weight =< 3.5 octanol/water partition coefficient (log P) **# Drug Like properties** (Lipinski, J Pharmacol Toxicol Methods. 2000 Jul-Aug;44(1):235-49): =< 10 Hydrogen-bond acceptor groups =< 8 rotatable bonds 150 =< Molecular weight =< 500 =< 150 Polar Surface Area =< 5 octanol/water partition coefficient (log P) **# Fragment Like properties** (Carr RA, Congreve M, Murray CW, Rees DC, Drug Discov Today. 2005 Jul 15;10(14):987): =< 5 rotatable bonds =< 250 Molecular weight =< 2.5 octanol/water partition coefficient (log P) ----- .. class:: infomark **Input** | - `SD-Format`_ | - `SMILES Format`_ .. _SD-Format: http://en.wikipedia.org/wiki/Chemical_table_file .. _SMILES Format: http://en.wikipedia.org/wiki/Simplified_molecular_input_line_entry_specification ----- .. class:: infomark **Output** | SDF formatted coordinates of the molecules, with selected properties stored as meta-data for each compound. | | SMILES, InChI or mol2 formatted files containing the 1D strings or 3D coordinates of each compound. ]]> </help> <expand macro="citations"> <citation type="doi">10.1186/1752-153X-2-5</citation> </expand> </tool>