diff mol2fps.xml @ 36:bcb3c078b2b4 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/chemfp commit d786052cd04f8b25eb4aff80b1b9724f62031b61
author bgruening
date Sat, 20 May 2017 12:56:09 -0400
parents 73b8c87779ae
children 02e03ac072cf
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mol2fps.xml	Sat May 20 12:56:09 2017 -0400
@@ -0,0 +1,276 @@
+<tool id="ctb_chemfp_mol2fps" name="Molecules to Fingerprints" version="0.3.0">
+    <description>with different fingerprint types</description>
+    <!--parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" merge_outputs="outfile"></parallelism-->
+    <requirements>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+        <requirement type="package" version="2016.03.3">rdkit</requirement>
+        <requirement type="package" version="2.4.1">openbabel</requirement>
+    </requirements>
+    <command>
+<![CDATA[
+        #set $fptype = $fp_opts.fp_opts_selector
+
+        #if $fptype in ['--FP2', '--FP3', '--FP4', '--MACCS']:
+            ## Open Babel fingerprints
+            ob2fps $fptype --in '${infile.ext}' '${infile}' -o '${outfile}'
+        #else:
+            ## RDKit fingerprints
+            rdkit2fps --in '${infile.ext}' '${infile}' -o '${outfile}'
+            #if $fp_opts.fp_opts_selector == "--RDK":
+                --RDK
+                --fpSize $fp_opts.fpSize
+                --minPath $fp_opts.minPath
+                --maxPath $fp_opts.maxPath
+                --nBitsPerHash $fp_opts.nBitsPerHash
+                $fp_opts.useHs
+            #elif $fp_opts.fp_opts_selector == "--torsions":
+                --torsions
+                --fpSize $fp_opts.fpSize
+                --targetSize $fp_opts.targetSize
+            #elif $fp_opts.fp_opts_selector == "--morgan":
+                --morgan
+                --fpSize $fp_opts.fpSize
+                --radius $fp_opts.radius
+                $fp_opts.useFeatures
+                $fp_opts.useChirality
+                $fp_opts.useBondTypes
+            #elif $fp_opts.fp_opts_selector == "--pairs":
+                --paris
+                --fpSize $fp_opts.fpSize
+                --minLength $fp_opts.minLength
+                --maxLength $fp_opts.maxLength
+            #elif $fp_opts.fp_opts_selector == "--maccs166":
+                --maccs166
+            #elif $fp_opts.fp_opts_selector == "--substruct":
+                --substruct
+            #end if
+        #end if
+        --errors report 2>&1
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/>
+        <conditional name="fp_opts">
+            <param name="fp_opts_selector" type="select" label="Type of fingerprint">
+                <option value='--FP2' selected="True">Open Babel FP2 fingerprints</option>
+                <option value='--FP3'>Open Babel FP3 fingerprints</option>
+                <option value='--FP4'>Open Babel FP4 fingerprints</option>
+                <option value='--MACCS'>Open Babel MACCS fingerprints</option>
+                <option value='--RDK'>RDKit topological fingerprint</option>
+                <option value='--torsions'>RDKit topological Torsion fingerprints</option>
+                <option value='--morgan'>RDKit Morgan fingerprints</option>
+                <option value='--pairs'>RDKit Atom Pair fingerprints</option>
+                <option value='--maccs166'>RDKit MACCS fingerprints</option>
+                <option value='--substruct'>RDKit substructure fingerprints</option>
+            </param>
+            <when value="--FP2" />
+            <when value="--FP3" />
+            <when value="--FP4" />
+            <when value="--MACCS" />
+            <when value="--RDK">
+                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
+                    <validator type="in_range" min="1" />
+                </param>
+                <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help="">
+                    <validator type="in_range" min="1" />
+                </param>
+                <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the subgraph" help="">
+                    <validator type="in_range" min="1" />
+                </param>
+                <param name="nBitsPerHash" type="integer" value="4" label="number of bits to set per path" help="">
+                    <validator type="in_range" min="1" />
+                </param>
+                <param name="useHs" type="boolean" truevalue="--useHs 1" falsevalue="--useHs 0" checked="true" 
+                    label="include information about the number of hydrogens on each atom" />
+            </when>
+            <when value="--torsions">
+                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
+                    <validator type="in_range" min="1" />
+                </param>
+                <param name="targetSize" type="integer" value="4" label="number of target bits in the fingerprint" help="">
+                    <validator type="in_range" min="1" />
+                </param>
+            </when>
+            <when value="--morgan">
+                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
+                    <validator type="in_range" min="1" />
+                </param>
+                <param name="radius" type="integer" value="2" label="radius for the Morgan algorithm" help="">
+                    <validator type="in_range" min="1" />
+                </param>
+                <param name="useFeatures" type="boolean" truevalue="--useFeatures 1" falsevalue="--useFeatures 0" checked="false"
+                    label="include information about the number of hydrogens on each atom" />
+                <param name="useChirality" type="boolean" truevalue="--useChirality 1" falsevalue="--useChirality 0" checked="false"
+                    label="include information about the number of hydrogens on each atom" />
+                <param name="useBondTypes" type="boolean" truevalue="--useBondTypes 1" falsevalue="--useBondTypes 0" checked="true"
+                    label="include information about the number of hydrogens on each atom" />
+            </when>
+            <when value="--pairs">
+                <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
+                    <validator type="in_range" min="1" />
+                </param>
+                <param name="minLength" type="integer" value="1" label="minimum bond count for a pair" help="">
+                    <validator type="in_range" min="1" />
+                </param>
+                <param name="maxLength" type="integer" value="30" label="maximum bond count for a pair" help="">
+                    <validator type="in_range" min="1" />
+                </param>
+            </when>
+            <when value="--maccs166" />
+            <when value="--substruct" />
+        </conditional>
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="fps" />
+    </outputs>
+    <tests>
+        <!-- FP2 -->
+        <test>
+            <param name="infile" value="CID_2244.sdf" ftype="sdf" />
+            <param name="fp_opts.fp_opts_selector" value="--FP2" />
+            <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" lines_diff="4"/>
+        </test>
+        <test>
+            <param name="infile" value="CID_2244.smi" ftype="smi" />
+            <param name="fp_opts.fp_opts_selector" value="--FP2" />
+            <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" lines_diff="4"/>
+        </test>
+        <!-- FP3 -->
+        <test>
+            <param name="infile" value="CID_2244.sdf" ftype="sdf" />
+            <param name="fp_opts.fp_opts_selector" value="--FP3" />
+            <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" lines_diff="4"/>
+        </test>
+        <test>
+            <param name="infile" value="CID_2244.smi" ftype="smi" />
+            <param name="fp_opts.fp_opts_selector" value="--FP3" />
+            <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" lines_diff="4"/>
+        </test>
+        <!-- FP4 -->
+        <test>
+            <param name="infile" value="CID_2244.sdf" ftype="sdf" />
+            <param name="fp_opts.fp_opts_selector" value="--FP4" />
+            <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/>
+        </test>
+        <test>
+            <param name="infile" value="CID_2244.smi" ftype="smi" />
+            <param name="fp_opts.fp_opts_selector" value="--FP4" />
+            <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" lines_diff="4"/>
+        </test>
+        <!-- MACCS -->
+        <test>
+            <param name="infile" value="CID_2244.sdf" ftype="sdf" />
+            <param name="fp_opts.fp_opts_selector" value="--MACCS" />
+            <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/>
+        </test>
+        <test>
+            <param name="infile" value="CID_2244.smi" ftype="smi" />
+            <param name="fp_opts.fp_opts_selector" value="--MACCS" />
+            <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" lines_diff="4"/>
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+
+.. class:: infomark
+
+**What this tool does**
+
+This tool uses chemfp_ to calculate 10 different fingerprints of common file formats. Chemfp uses `Open Babel`_, OpenEye_ and RDKit_.
+
+For more information check the websites listed below::
+
+	- http://www.rdkit.org/docs/GettingStartedInPython.html#fingerprinting-and-molecular-similarity
+	- http://openbabel.org/wiki/Tutorial:Fingerprints
+
+-----
+
+.. class:: infomark
+
+**Input**
+
+FPS fingerprint file format
+
+* Example::
+
+	      - SDF File
+
+		28434379
+		  -OEChem-02031205132D
+
+		 37 39  0     0  0  0  0  0  0999 V2000
+		    8.1648   -1.8842    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
+		    6.0812   -0.2134    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+		    6.0812   -1.8229    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+		    2.5369   -2.0182    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
+		    6.3919    0.7371    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
+		    7.3704    0.9433    0.0000 C   0  0  0  0
+		    ......
+		  1 15  1  0  0  0  0
+		  1 35  1  0  0  0  0
+		  2  5  1  0  0  0  0
+		  2 11  1  0  0  0  0
+		  2 12  1  0  0  0  0
+		  3 12  2  0  0  0  0
+		  3 13  1  0  0  0  0
+		  4 18  1  0  0  0  0
+		  ......
+
+			>PUBCHEM_COMPOUND_CID<
+			28434379
+
+			> <PUBCHEM_COMPOUND_CANONICALIZED>
+			1
+
+			> <PUBCHEM_CACTVS_COMPLEXITY>
+			280
+
+			> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
+			2
+
+			> <PUBCHEM_CACTVS_HBOND_DONOR>
+			2
+
+			> <PUBCHEM_CACTVS_ROTATABLE_BOND>
+			2
+
+			> <PUBCHEM_CACTVS_SUBSKEYS>
+			AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
+
+			>
+
+		- type : FP2
+
+-----
+
+.. class:: infomark
+
+**Output**
+
+* Example::
+
+	#FPS1
+	#num_bits=1021
+	#type=OpenBabel-FP2/1
+	#software=OpenBabel/2.3.0
+	#source=/tmp/dataset_409.dat.sdf
+	#date=2012-02-03T11:13:39
+	c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c
+	0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300
+	10000000000080000000c0000060000c0000060810000010000000800102000000	28434379
+
+
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1186/1758-2946-3-33</citation>
+        <citation type="doi">10.1186/1758-2946-5-S1-P36</citation>
+        <citation type="bibtex">
+            @electronic{rdkit,
+                title = {RDKit: Open-source cheminformatics},
+                url ={http://www.rdkit.org}
+            }
+        </citation>
+    </citations>
+</tool>