Mercurial > repos > bgruening > chemfp
diff mol2fps.xml @ 36:bcb3c078b2b4 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit d786052cd04f8b25eb4aff80b1b9724f62031b61
author | bgruening |
---|---|
date | Sat, 20 May 2017 12:56:09 -0400 |
parents | 73b8c87779ae |
children | 02e03ac072cf |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mol2fps.xml Sat May 20 12:56:09 2017 -0400 @@ -0,0 +1,276 @@ +<tool id="ctb_chemfp_mol2fps" name="Molecules to Fingerprints" version="0.3.0"> + <description>with different fingerprint types</description> + <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" merge_outputs="outfile"></parallelism--> + <requirements> + <requirement type="package" version="1.1p1">chemfp</requirement> + <requirement type="package" version="2016.03.3">rdkit</requirement> + <requirement type="package" version="2.4.1">openbabel</requirement> + </requirements> + <command> +<![CDATA[ + #set $fptype = $fp_opts.fp_opts_selector + + #if $fptype in ['--FP2', '--FP3', '--FP4', '--MACCS']: + ## Open Babel fingerprints + ob2fps $fptype --in '${infile.ext}' '${infile}' -o '${outfile}' + #else: + ## RDKit fingerprints + rdkit2fps --in '${infile.ext}' '${infile}' -o '${outfile}' + #if $fp_opts.fp_opts_selector == "--RDK": + --RDK + --fpSize $fp_opts.fpSize + --minPath $fp_opts.minPath + --maxPath $fp_opts.maxPath + --nBitsPerHash $fp_opts.nBitsPerHash + $fp_opts.useHs + #elif $fp_opts.fp_opts_selector == "--torsions": + --torsions + --fpSize $fp_opts.fpSize + --targetSize $fp_opts.targetSize + #elif $fp_opts.fp_opts_selector == "--morgan": + --morgan + --fpSize $fp_opts.fpSize + --radius $fp_opts.radius + $fp_opts.useFeatures + $fp_opts.useChirality + $fp_opts.useBondTypes + #elif $fp_opts.fp_opts_selector == "--pairs": + --paris + --fpSize $fp_opts.fpSize + --minLength $fp_opts.minLength + --maxLength $fp_opts.maxLength + #elif $fp_opts.fp_opts_selector == "--maccs166": + --maccs166 + #elif $fp_opts.fp_opts_selector == "--substruct": + --substruct + #end if + #end if + --errors report 2>&1 +]]> + </command> + <inputs> + <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/> + <conditional name="fp_opts"> + <param name="fp_opts_selector" type="select" label="Type of fingerprint"> + <option value='--FP2' selected="True">Open Babel FP2 fingerprints</option> + <option value='--FP3'>Open Babel FP3 fingerprints</option> + <option value='--FP4'>Open Babel FP4 fingerprints</option> + <option value='--MACCS'>Open Babel MACCS fingerprints</option> + <option value='--RDK'>RDKit topological fingerprint</option> + <option value='--torsions'>RDKit topological Torsion fingerprints</option> + <option value='--morgan'>RDKit Morgan fingerprints</option> + <option value='--pairs'>RDKit Atom Pair fingerprints</option> + <option value='--maccs166'>RDKit MACCS fingerprints</option> + <option value='--substruct'>RDKit substructure fingerprints</option> + </param> + <when value="--FP2" /> + <when value="--FP3" /> + <when value="--FP4" /> + <when value="--MACCS" /> + <when value="--RDK"> + <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the subgraph" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="nBitsPerHash" type="integer" value="4" label="number of bits to set per path" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="useHs" type="boolean" truevalue="--useHs 1" falsevalue="--useHs 0" checked="true" + label="include information about the number of hydrogens on each atom" /> + </when> + <when value="--torsions"> + <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="targetSize" type="integer" value="4" label="number of target bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + </when> + <when value="--morgan"> + <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="radius" type="integer" value="2" label="radius for the Morgan algorithm" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="useFeatures" type="boolean" truevalue="--useFeatures 1" falsevalue="--useFeatures 0" checked="false" + label="include information about the number of hydrogens on each atom" /> + <param name="useChirality" type="boolean" truevalue="--useChirality 1" falsevalue="--useChirality 0" checked="false" + label="include information about the number of hydrogens on each atom" /> + <param name="useBondTypes" type="boolean" truevalue="--useBondTypes 1" falsevalue="--useBondTypes 0" checked="true" + label="include information about the number of hydrogens on each atom" /> + </when> + <when value="--pairs"> + <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="minLength" type="integer" value="1" label="minimum bond count for a pair" help=""> + <validator type="in_range" min="1" /> + </param> + <param name="maxLength" type="integer" value="30" label="maximum bond count for a pair" help=""> + <validator type="in_range" min="1" /> + </param> + </when> + <when value="--maccs166" /> + <when value="--substruct" /> + </conditional> + + </inputs> + <outputs> + <data name="outfile" format="fps" /> + </outputs> + <tests> + <!-- FP2 --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fp_opts.fp_opts_selector" value="--FP2" /> + <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" lines_diff="4"/> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fp_opts.fp_opts_selector" value="--FP2" /> + <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" lines_diff="4"/> + </test> + <!-- FP3 --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fp_opts.fp_opts_selector" value="--FP3" /> + <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" lines_diff="4"/> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fp_opts.fp_opts_selector" value="--FP3" /> + <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" lines_diff="4"/> + </test> + <!-- FP4 --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fp_opts.fp_opts_selector" value="--FP4" /> + <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fp_opts.fp_opts_selector" value="--FP4" /> + <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/> + </test> + <!-- MACCS --> + <test> + <param name="infile" value="CID_2244.sdf" ftype="sdf" /> + <param name="fp_opts.fp_opts_selector" value="--MACCS" /> + <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/> + </test> + <test> + <param name="infile" value="CID_2244.smi" ftype="smi" /> + <param name="fp_opts.fp_opts_selector" value="--MACCS" /> + <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/> + </test> + </tests> + <help> +<![CDATA[ + +.. class:: infomark + +**What this tool does** + +This tool uses chemfp_ to calculate 10 different fingerprints of common file formats. Chemfp uses `Open Babel`_, OpenEye_ and RDKit_. + +For more information check the websites listed below:: + + - http://www.rdkit.org/docs/GettingStartedInPython.html#fingerprinting-and-molecular-similarity + - http://openbabel.org/wiki/Tutorial:Fingerprints + +----- + +.. class:: infomark + +**Input** + +FPS fingerprint file format + +* Example:: + + - SDF File + + 28434379 + -OEChem-02031205132D + + 37 39 0 0 0 0 0 0 0999 V2000 + 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7.3704 0.9433 0.0000 C 0 0 0 0 + ...... + 1 15 1 0 0 0 0 + 1 35 1 0 0 0 0 + 2 5 1 0 0 0 0 + 2 11 1 0 0 0 0 + 2 12 1 0 0 0 0 + 3 12 2 0 0 0 0 + 3 13 1 0 0 0 0 + 4 18 1 0 0 0 0 + ...... + + >PUBCHEM_COMPOUND_CID< + 28434379 + + > <PUBCHEM_COMPOUND_CANONICALIZED> + 1 + + > <PUBCHEM_CACTVS_COMPLEXITY> + 280 + + > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> + 2 + + > <PUBCHEM_CACTVS_HBOND_DONOR> + 2 + + > <PUBCHEM_CACTVS_ROTATABLE_BOND> + 2 + + > <PUBCHEM_CACTVS_SUBSKEYS> + AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== + + > + + - type : FP2 + +----- + +.. class:: infomark + +**Output** + +* Example:: + + #FPS1 + #num_bits=1021 + #type=OpenBabel-FP2/1 + #software=OpenBabel/2.3.0 + #source=/tmp/dataset_409.dat.sdf + #date=2012-02-03T11:13:39 + c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c + 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300 + 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379 + + +]]> + </help> + <citations> + <citation type="doi">10.1186/1758-2946-3-33</citation> + <citation type="doi">10.1186/1758-2946-5-S1-P36</citation> + <citation type="bibtex"> + @electronic{rdkit, + title = {RDKit: Open-source cheminformatics}, + url ={http://www.rdkit.org} + } + </citation> + </citations> +</tool>