6
|
1 <tool id="chemfp_mol2fps" name="Molecules to Fingerprints" version="0.2.0">
|
0
|
2 <description>with different fingerprint types</description>
|
|
3 <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism>
|
|
4 <requirements>
|
|
5 <requirement type="package" version="1.1p1">chemfp</requirement>
|
6
|
6 <requirement type="package" version="2012_12_1">rdkit</requirement>
|
0
|
7 </requirements>
|
|
8 <command>
|
6
|
9 #if $fptype in ['--FP2', '--FP3', '--FP4', '--MACCS']:
|
|
10 ## Open Babel fingerprints
|
|
11 ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}" --errors report 2>&1
|
|
12 #else:
|
|
13 ## RDKit fingerprints
|
|
14 rdkit2fps --in "${infile.ext}" "${infile}" -o "${outfile}"
|
|
15 #if $fp_opts.fp_opts_selector=="--RDK":
|
|
16 --RDK
|
|
17 --fpSize $fp_opts.fpSize
|
|
18 --minPath $fp_opts.minPath
|
|
19 --maxPath $fp_opts.maxPath
|
|
20 --nBitsPerHash $fp_opts.nBitsPerHash
|
|
21 $fp_opts.useHs
|
|
22 #elif $fp_opts.fp_opts_selector=="--torsions":
|
|
23 --torsions
|
|
24 --fpSize $fp_opts.fpSize
|
|
25 --targetSize $fp_opts.targetSize
|
|
26 #elif $fp_opts.fp_opts_selector=="--morgan":
|
|
27 --morgan
|
|
28 --fpSize $fp_opts.fpSize
|
|
29 --radius $fp_opts.radius
|
|
30 $fp_opts.useFeatures
|
|
31 $fp_opts.useChirality
|
|
32 $fp_opts.useBondTypes
|
|
33 #elif $fp_opts.fp_opts_selector=="--pairs":
|
|
34 --paris
|
|
35 --fpSize $fp_opts.fpSize
|
|
36 --minLength $fp_opts.minLength
|
|
37 --maxLength $fp_opts.maxLength
|
|
38 #elif $fp_opts.fp_opts_selector=="--maccs166":
|
|
39 --maccs166
|
|
40 #elif $fp_opts.fp_opts_selector=="--substruct":
|
|
41 --substruct
|
|
42 #endif
|
|
43 --errors report 2>&1
|
|
44 #endif
|
0
|
45 </command>
|
|
46 <inputs>
|
|
47 <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/>
|
6
|
48 <conditional name="fp_opts">
|
|
49 <param name="fp_opts_selector" type="select" label="Type of fingerprint">
|
|
50 <option value='--FP2' selected="True">Open Babel FP2 fingerprints</option>
|
|
51 <option value='--FP3'>Open Babel FP3 fingerprints</option>
|
|
52 <option value='--FP4'>Open Babel FP4 fingerprints</option>
|
|
53 <option value='--MACCS'>Open Babel MACCS fingerprints</option>
|
|
54 <option value='--RDK'>RDKit topological fingerprint</option>
|
|
55 <option value='--torsions'>RDKit topological Torsion fingerprints</option>
|
|
56 <option value='--morgan'>RDKit Morgan fingerprints</option>
|
|
57 <option value='--pairs'>RDKit Atom Pair fingerprints</option>
|
|
58 <option value='--maccs166'>RDKit MACCS fingerprints</option>
|
|
59 <option value='--substruct'>RDKit substructure fingerprints</option>
|
|
60 </param>
|
|
61 <when value="--FP2" />
|
|
62 <when value="--FP3" />
|
|
63 <when value="--FP4" />
|
|
64 <when value="--MACCS" />
|
|
65 <when value="--RDK">
|
|
66 <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
|
|
67 <validator type="in_range" min="1" />
|
|
68 </param>
|
|
69 <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help="">
|
|
70 <validator type="in_range" min="1" />
|
|
71 </param>
|
|
72 <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the subgraph" help="">
|
|
73 <validator type="in_range" min="1" />
|
|
74 </param>
|
|
75 <param name="nBitsPerHash" type="integer" value="4" label="number of bits to set per path" help="">
|
|
76 <validator type="in_range" min="1" />
|
|
77 </param>
|
|
78 <param name="useHs" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useHs 1" falsevalue="--useHs 0" checked="true" />
|
|
79 </when>
|
|
80 <when value="--torsions">
|
|
81 <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
|
|
82 <validator type="in_range" min="1" />
|
|
83 </param>
|
|
84 <param name="targetSize" type="integer" value="4" label="number of target bits in the fingerprint" help="">
|
|
85 <validator type="in_range" min="1" />
|
|
86 </param>
|
|
87 </when>
|
|
88 <when value="--morgan">
|
|
89 <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
|
|
90 <validator type="in_range" min="1" />
|
|
91 </param>
|
|
92 <param name="radius" type="integer" value="2" label="radius for the Morgan algorithm" help="">
|
|
93 <validator type="in_range" min="1" />
|
|
94 </param>
|
|
95 <param name="useFeatures" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useFeatures 1" falsevalue="--useFeatures 0" checked="false" />
|
|
96 <param name="useChirality" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useChirality 1" falsevalue="--useChirality 0" checked="false" />
|
|
97 <param name="useBondTypes" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useBondTypes 1" falsevalue="--useBondTypes 0" checked="true" />
|
|
98 </when>
|
|
99 <when value="--pairs">
|
|
100 <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help="">
|
|
101 <validator type="in_range" min="1" />
|
|
102 </param>
|
|
103 <param name="minLength" type="integer" value="1" label="minimum bond count for a pair" help="">
|
|
104 <validator type="in_range" min="1" />
|
|
105 </param>
|
|
106 <param name="maxLength" type="integer" value="30" label="maximum bond count for a pair" help="">
|
|
107 <validator type="in_range" min="1" />
|
|
108 </param>
|
|
109 </when>
|
|
110 <when value="--maccs166" />
|
|
111 <when value="--substruct" />
|
|
112 </conditional>
|
|
113
|
0
|
114 </inputs>
|
|
115 <outputs>
|
|
116 <data name="outfile" format="fps" />
|
|
117 </outputs>
|
|
118 <tests>
|
|
119 <!-- FP2 -->
|
|
120 <test>
|
|
121 <param name="infile" value="CID_2244.sdf" ftype="sdf" />
|
|
122 <param name="fptype" value="--FP2" />
|
|
123 <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" />
|
|
124 </test>
|
|
125 <test>
|
|
126 <param name="infile" value="CID_2244.smi" ftype="smi" />
|
|
127 <param name="fptype" value="--FP2" />
|
|
128 <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" />
|
|
129 </test>
|
|
130 <!-- FP3 -->
|
|
131 <test>
|
|
132 <param name="infile" value="CID_2244.sdf" ftype="sdf" />
|
|
133 <param name="fptype" value="--FP3" />
|
|
134 <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" />
|
|
135 </test>
|
|
136 <test>
|
|
137 <param name="infile" value="CID_2244.smi" ftype="smi" />
|
|
138 <param name="fptype" value="--FP3" />
|
|
139 <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" />
|
|
140 </test>
|
|
141 <!-- FP4 -->
|
|
142 <test>
|
|
143 <param name="infile" value="CID_2244.sdf" ftype="sdf" />
|
|
144 <param name="fptype" value="--FP4" />
|
|
145 <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" />
|
|
146 </test>
|
|
147 <test>
|
|
148 <param name="infile" value="CID_2244.smi" ftype="smi" />
|
|
149 <param name="fptype" value="--FP4" />
|
|
150 <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" />
|
|
151 </test>
|
|
152 <!-- MACCS -->
|
|
153 <test>
|
|
154 <param name="infile" value="CID_2244.sdf" ftype="sdf" />
|
|
155 <param name="fptype" value="--MACCS" />
|
|
156 <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" />
|
|
157 </test>
|
|
158 <test>
|
|
159 <param name="infile" value="CID_2244.smi" ftype="smi" />
|
|
160 <param name="fptype" value="--MACCS" />
|
|
161 <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" />
|
|
162 </test>
|
|
163 </tests>
|
|
164 <help>
|
|
165
|
|
166
|
|
167 **What it does**
|
|
168
|
|
169 Generate fingerprints using OpenBabel
|
|
170
|
|
171 -----
|
|
172
|
|
173 **Example**
|
|
174
|
|
175 * input::
|
|
176
|
|
177 - SDF File
|
|
178
|
|
179 28434379
|
|
180 -OEChem-02031205132D
|
|
181
|
|
182 37 39 0 0 0 0 0 0 0999 V2000
|
|
183 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
|
|
184 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
|
|
185 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
|
|
186 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
|
|
187 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
|
|
188 7.3704 0.9433 0.0000 C 0 0 0 0
|
|
189 ......
|
|
190 1 15 1 0 0 0 0
|
|
191 1 35 1 0 0 0 0
|
|
192 2 5 1 0 0 0 0
|
|
193 2 11 1 0 0 0 0
|
|
194 2 12 1 0 0 0 0
|
|
195 3 12 2 0 0 0 0
|
|
196 3 13 1 0 0 0 0
|
|
197 4 18 1 0 0 0 0
|
|
198 ......
|
|
199
|
|
200 >PUBCHEM_COMPOUND_CID<
|
|
201 28434379
|
|
202
|
|
203 > <PUBCHEM_COMPOUND_CANONICALIZED>
|
|
204 1
|
|
205
|
|
206 > <PUBCHEM_CACTVS_COMPLEXITY>
|
|
207 280
|
|
208
|
|
209 > <PUBCHEM_CACTVS_HBOND_ACCEPTOR>
|
|
210 2
|
|
211
|
|
212 > <PUBCHEM_CACTVS_HBOND_DONOR>
|
|
213 2
|
|
214
|
|
215 > <PUBCHEM_CACTVS_ROTATABLE_BOND>
|
|
216 2
|
|
217
|
|
218 > <PUBCHEM_CACTVS_SUBSKEYS>
|
|
219 AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA==
|
|
220
|
|
221 >
|
|
222
|
|
223 - type : FP2
|
|
224
|
|
225 * output::
|
|
226
|
|
227 #FPS1
|
|
228 #num_bits=1021
|
|
229 #type=OpenBabel-FP2/1
|
|
230 #software=OpenBabel/2.3.0
|
|
231 #source=/tmp/dataset_409.dat.sdf
|
|
232 #date=2012-02-03T11:13:39
|
|
233 c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c
|
|
234 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300
|
|
235 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379
|
|
236
|
|
237
|
|
238 </help>
|
|
239 </tool>
|