9
|
1 <tool id="ctb_chemfp_mol2fps" name="Molecules to Fingerprints" version="0.2.0">
|
|
2 <description>with different fingerprint types</description>
|
|
3 <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism>
|
|
4 <requirements>
|
|
5 <requirement type="package" version="1.1p1">chemfp</requirement>
|
|
6 <requirement type="package" version="2012_12_1">rdkit</requirement>
|
|
7 </requirements>
|
|
8 <command>
|
13
|
9 #set $fptype = $fp_opts.fp_opts_selector
|
|
10
|
9
|
11 #if $fptype in ['--FP2', '--FP3', '--FP4', '--MACCS']:
|
|
12 ## Open Babel fingerprints
|
13
|
13 ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}"
|
9
|
14 #else:
|
|
15 ## RDKit fingerprints
|
|
16 rdkit2fps --in "${infile.ext}" "${infile}" -o "${outfile}"
|
13
|
17 #if $fp_opts.fp_opts_selector == "--RDK":
|
9
|
18 --RDK
|
|
19 --fpSize $fp_opts.fpSize
|
|
20 --minPath $fp_opts.minPath
|
|
21 --maxPath $fp_opts.maxPath
|
|
22 --nBitsPerHash $fp_opts.nBitsPerHash
|
|
23 $fp_opts.useHs
|
13
|
24 #elif $fp_opts.fp_opts_selector == "--torsions":
|
9
|
25 --torsions
|
|
26 --fpSize $fp_opts.fpSize
|
|
27 --targetSize $fp_opts.targetSize
|
13
|
28 #elif $fp_opts.fp_opts_selector == "--morgan":
|
9
|
29 --morgan
|
|
30 --fpSize $fp_opts.fpSize
|
|
31 --radius $fp_opts.radius
|
|
32 $fp_opts.useFeatures
|
|
33 $fp_opts.useChirality
|
|
34 $fp_opts.useBondTypes
|
13
|
35 #elif $fp_opts.fp_opts_selector == "--pairs":
|
9
|
36 --paris
|
|
37 --fpSize $fp_opts.fpSize
|
|
38 --minLength $fp_opts.minLength
|
|
39 --maxLength $fp_opts.maxLength
|
13
|
40 #elif $fp_opts.fp_opts_selector == "--maccs166":
|
9
|
41 --maccs166
|
13
|
42 #elif $fp_opts.fp_opts_selector == "--substruct":
|
9
|
43 --substruct
|
13
|
44 #end if
|
|
45 #end if
|
|
46 --errors report 2>&1
|
9
|
47 </command>
|
|
48 <inputs>
|
|
49 <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/>
|
|
50 <conditional name="fp_opts">
|
|
51 <param name="fp_opts_selector" type="select" label="Type of fingerprint">
|
|
52 <option value='--FP2' selected="True">Open Babel FP2 fingerprints</option>
|
|
53 <option value='--FP3'>Open Babel FP3 fingerprints</option>
|
|
54 <option value='--FP4'>Open Babel FP4 fingerprints</option>
|
|
55 <option value='--MACCS'>Open Babel MACCS fingerprints</option>
|
|
56 <option value='--RDK'>RDKit topological fingerprint</option>
|
|
57 <option value='--torsions'>RDKit topological Torsion fingerprints</option>
|
|
58 <option value='--morgan'>RDKit Morgan fingerprints</option>
|
|
59 <option value='--pairs'>RDKit Atom Pair fingerprints</option>
|
|
60 <option value='--maccs166'>RDKit MACCS fingerprints</option>
|
|
61 <option value='--substruct'>RDKit substructure fingerprints</option>
|
|
62 </param>
|
|
63 <when value="--FP2" />
|
|
64 <when value="--FP3" />
|
|
65 <when value="--FP4" />
|
|
66 <when value="--MACCS" />
|
|
67 <when value="--RDK">
|
|
68 <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
|
|
69 <validator type="in_range" min="1" />
|
|
70 </param>
|
|
71 <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help="">
|
|
72 <validator type="in_range" min="1" />
|
|
73 </param>
|
|
74 <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the subgraph" help="">
|
|
75 <validator type="in_range" min="1" />
|
|
76 </param>
|
|
77 <param name="nBitsPerHash" type="integer" value="4" label="number of bits to set per path" help="">
|
|
78 <validator type="in_range" min="1" />
|
|
79 </param>
|
|
80 <param name="useHs" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useHs 1" falsevalue="--useHs 0" checked="true" />
|
|
81 </when>
|
|
82 <when value="--torsions">
|
|
83 <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
|
|
84 <validator type="in_range" min="1" />
|
|
85 </param>
|
|
86 <param name="targetSize" type="integer" value="4" label="number of target bits in the fingerprint" help="">
|
|
87 <validator type="in_range" min="1" />
|
|
88 </param>
|
|
89 </when>
|
|
90 <when value="--morgan">
|
|
91 <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
|
|
92 <validator type="in_range" min="1" />
|
|
93 </param>
|
|
94 <param name="radius" type="integer" value="2" label="radius for the Morgan algorithm" help="">
|
|
95 <validator type="in_range" min="1" />
|
|
96 </param>
|
|
97 <param name="useFeatures" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useFeatures 1" falsevalue="--useFeatures 0" checked="false" />
|
|
98 <param name="useChirality" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useChirality 1" falsevalue="--useChirality 0" checked="false" />
|
|
99 <param name="useBondTypes" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useBondTypes 1" falsevalue="--useBondTypes 0" checked="true" />
|
|
100 </when>
|
|
101 <when value="--pairs">
|
|
102 <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
|
|
103 <validator type="in_range" min="1" />
|
|
104 </param>
|
|
105 <param name="minLength" type="integer" value="1" label="minimum bond count for a pair" help="">
|
|
106 <validator type="in_range" min="1" />
|
|
107 </param>
|
|
108 <param name="maxLength" type="integer" value="30" label="maximum bond count for a pair" help="">
|
|
109 <validator type="in_range" min="1" />
|
|
110 </param>
|
|
111 </when>
|
|
112 <when value="--maccs166" />
|
|
113 <when value="--substruct" />
|
|
114 </conditional>
|
|
115
|
|
116 </inputs>
|
|
117 <outputs>
|
|
118 <data name="outfile" format="fps" />
|
|
119 </outputs>
|
|
120 <tests>
|
|
121 <!-- FP2 -->
|
|
122 <test>
|
|
123 <param name="infile" value="CID_2244.sdf" ftype="sdf" />
|
13
|
124 <param name="fp_opts.fp_opts_selector" value="--FP2" />
|
9
|
125 <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" />
|
|
126 </test>
|
|
127 <test>
|
|
128 <param name="infile" value="CID_2244.smi" ftype="smi" />
|
13
|
129 <param name="fp_opts.fp_opts_selector" value="--FP2" />
|
9
|
130 <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" />
|
|
131 </test>
|
|
132 <!-- FP3 -->
|
|
133 <test>
|
|
134 <param name="infile" value="CID_2244.sdf" ftype="sdf" />
|
13
|
135 <param name="fp_opts.fp_opts_selector" value="--FP3" />
|
9
|
136 <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" />
|
|
137 </test>
|
|
138 <test>
|
|
139 <param name="infile" value="CID_2244.smi" ftype="smi" />
|
13
|
140 <param name="fp_opts.fp_opts_selector" value="--FP3" />
|
9
|
141 <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" />
|
|
142 </test>
|
|
143 <!-- FP4 -->
|
|
144 <test>
|
|
145 <param name="infile" value="CID_2244.sdf" ftype="sdf" />
|
13
|
146 <param name="fp_opts.fp_opts_selector" value="--FP4" />
|
9
|
147 <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" />
|
|
148 </test>
|
|
149 <test>
|
|
150 <param name="infile" value="CID_2244.smi" ftype="smi" />
|
13
|
151 <param name="fp_opts.fp_opts_selector" value="--FP4" />
|
9
|
152 <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" />
|
|
153 </test>
|
|
154 <!-- MACCS -->
|
|
155 <test>
|
|
156 <param name="infile" value="CID_2244.sdf" ftype="sdf" />
|
13
|
157 <param name="fp_opts.fp_opts_selector" value="--MACCS" />
|
9
|
158 <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" />
|
|
159 </test>
|
|
160 <test>
|
|
161 <param name="infile" value="CID_2244.smi" ftype="smi" />
|
13
|
162 <param name="fp_opts.fp_opts_selector" value="--MACCS" />
|
9
|
163 <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" />
|
|
164 </test>
|
|
165 </tests>
|
|
166 <help>
|
|
167
|
|
168
|
|
169 **What it does**
|
|
170
|
|
171 Generate fingerprints using OpenBabel
|
|
172
|
|
173 -----
|
|
174
|
|
175 **Example**
|
|
176
|
|
177 * input::
|
|
178
|
|
179 - SDF File
|
|
180
|
|
181 28434379
|
|
182 -OEChem-02031205132D
|
|
183
|
|
184 37 39 0 0 0 0 0 0 0999 V2000
|
|
185 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
|
186 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
|
|
187 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
|
|
188 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
|
|
189 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
190 7.3704 0.9433 0.0000 C 0 0 0 0
|
|
191 ......
|
|
192 1 15 1 0 0 0 0
|
|
193 1 35 1 0 0 0 0
|
|
194 2 5 1 0 0 0 0
|
|
195 2 11 1 0 0 0 0
|
|
196 2 12 1 0 0 0 0
|
|
197 3 12 2 0 0 0 0
|
|
198 3 13 1 0 0 0 0
|
|
199 4 18 1 0 0 0 0
|
|
200 ......
|
|
201
|
|
202 >PUBCHEM_COMPOUND_CID<
|
|
203 28434379
|
|
204
|
|
205 > <PUBCHEM_COMPOUND_CANONICALIZED>
|
|
206 1
|
|
207
|
|
208 > <PUBCHEM_CACTVS_COMPLEXITY>
|
|
209 280
|
|
210
|
|
211 > <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
|
|
212 2
|
|
213
|
|
214 > <PUBCHEM_CACTVS_HBOND_DONOR>
|
|
215 2
|
|
216
|
|
217 > <PUBCHEM_CACTVS_ROTATABLE_BOND>
|
|
218 2
|
|
219
|
|
220 > <PUBCHEM_CACTVS_SUBSKEYS>
|
|
221 AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
|
|
222
|
|
223 >
|
|
224
|
|
225 - type : FP2
|
|
226
|
|
227 * output::
|
|
228
|
|
229 #FPS1
|
|
230 #num_bits=1021
|
|
231 #type=OpenBabel-FP2/1
|
|
232 #software=OpenBabel/2.3.0
|
|
233 #source=/tmp/dataset_409.dat.sdf
|
|
234 #date=2012-02-03T11:13:39
|
|
235 c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c
|
|
236 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300
|
|
237 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379
|
|
238
|
|
239
|
|
240 </help>
|
|
241 </tool>
|