Mercurial > repos > bgruening > chemfp
annotate chemfp_mol2fps/mol2fps.xml @ 24:dce673edc031
ChemicalToolBoX update.
author | Bjoern Gruening <bjoern.gruening@gmail.com> |
---|---|
date | Sat, 20 Jul 2013 19:07:47 +0200 |
parents | 1868005213a1 |
children | a2ba3431eca4 |
rev | line source |
---|---|
9 | 1 <tool id="ctb_chemfp_mol2fps" name="Molecules to Fingerprints" version="0.2.0"> |
2 <description>with different fingerprint types</description> | |
3 <parallelism method="multi" split_inputs="infile" split_mode="to_size" split_size="10000" shared_inputs="" merge_outputs="outfile"></parallelism> | |
4 <requirements> | |
5 <requirement type="package" version="1.1p1">chemfp</requirement> | |
6 <requirement type="package" version="2012_12_1">rdkit</requirement> | |
20
21d29a7f13d8
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
13
diff
changeset
|
7 <requirement type="package" version="2.3.2">openbabel</requirement> |
9 | 8 </requirements> |
9 <command> | |
13 | 10 #set $fptype = $fp_opts.fp_opts_selector |
11 | |
9 | 12 #if $fptype in ['--FP2', '--FP3', '--FP4', '--MACCS']: |
13 ## Open Babel fingerprints | |
13 | 14 ob2fps $fptype --in "${infile.ext}" "${infile}" -o "${outfile}" |
9 | 15 #else: |
16 ## RDKit fingerprints | |
17 rdkit2fps --in "${infile.ext}" "${infile}" -o "${outfile}" | |
13 | 18 #if $fp_opts.fp_opts_selector == "--RDK": |
9 | 19 --RDK |
20 --fpSize $fp_opts.fpSize | |
21 --minPath $fp_opts.minPath | |
22 --maxPath $fp_opts.maxPath | |
23 --nBitsPerHash $fp_opts.nBitsPerHash | |
24 $fp_opts.useHs | |
13 | 25 #elif $fp_opts.fp_opts_selector == "--torsions": |
9 | 26 --torsions |
27 --fpSize $fp_opts.fpSize | |
28 --targetSize $fp_opts.targetSize | |
13 | 29 #elif $fp_opts.fp_opts_selector == "--morgan": |
9 | 30 --morgan |
31 --fpSize $fp_opts.fpSize | |
32 --radius $fp_opts.radius | |
33 $fp_opts.useFeatures | |
34 $fp_opts.useChirality | |
35 $fp_opts.useBondTypes | |
13 | 36 #elif $fp_opts.fp_opts_selector == "--pairs": |
9 | 37 --paris |
38 --fpSize $fp_opts.fpSize | |
39 --minLength $fp_opts.minLength | |
40 --maxLength $fp_opts.maxLength | |
13 | 41 #elif $fp_opts.fp_opts_selector == "--maccs166": |
9 | 42 --maccs166 |
13 | 43 #elif $fp_opts.fp_opts_selector == "--substruct": |
9 | 44 --substruct |
13 | 45 #end if |
46 #end if | |
47 --errors report 2>&1 | |
9 | 48 </command> |
49 <inputs> | |
50 <param name="infile" type='data' format="sdf,smi,mol,mol2,cml,inchi" label="molecule file"/> | |
51 <conditional name="fp_opts"> | |
52 <param name="fp_opts_selector" type="select" label="Type of fingerprint"> | |
53 <option value='--FP2' selected="True">Open Babel FP2 fingerprints</option> | |
54 <option value='--FP3'>Open Babel FP3 fingerprints</option> | |
55 <option value='--FP4'>Open Babel FP4 fingerprints</option> | |
56 <option value='--MACCS'>Open Babel MACCS fingerprints</option> | |
57 <option value='--RDK'>RDKit topological fingerprint</option> | |
58 <option value='--torsions'>RDKit topological Torsion fingerprints</option> | |
59 <option value='--morgan'>RDKit Morgan fingerprints</option> | |
60 <option value='--pairs'>RDKit Atom Pair fingerprints</option> | |
61 <option value='--maccs166'>RDKit MACCS fingerprints</option> | |
62 <option value='--substruct'>RDKit substructure fingerprints</option> | |
63 </param> | |
64 <when value="--FP2" /> | |
65 <when value="--FP3" /> | |
66 <when value="--FP4" /> | |
67 <when value="--MACCS" /> | |
68 <when value="--RDK"> | |
69 <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> | |
70 <validator type="in_range" min="1" /> | |
71 </param> | |
72 <param name="minPath" type="integer" value="1" label="minimum number of bonds to include in the subgraph" help=""> | |
73 <validator type="in_range" min="1" /> | |
74 </param> | |
75 <param name="maxPath" type="integer" value="7" label="maximum number of bonds to include in the subgraph" help=""> | |
76 <validator type="in_range" min="1" /> | |
77 </param> | |
78 <param name="nBitsPerHash" type="integer" value="4" label="number of bits to set per path" help=""> | |
79 <validator type="in_range" min="1" /> | |
80 </param> | |
81 <param name="useHs" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useHs 1" falsevalue="--useHs 0" checked="true" /> | |
82 </when> | |
83 <when value="--torsions"> | |
84 <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> | |
85 <validator type="in_range" min="1" /> | |
86 </param> | |
87 <param name="targetSize" type="integer" value="4" label="number of target bits in the fingerprint" help=""> | |
88 <validator type="in_range" min="1" /> | |
89 </param> | |
90 </when> | |
91 <when value="--morgan"> | |
92 <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> | |
93 <validator type="in_range" min="1" /> | |
94 </param> | |
95 <param name="radius" type="integer" value="2" label="radius for the Morgan algorithm" help=""> | |
96 <validator type="in_range" min="1" /> | |
97 </param> | |
98 <param name="useFeatures" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useFeatures 1" falsevalue="--useFeatures 0" checked="false" /> | |
99 <param name="useChirality" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useChirality 1" falsevalue="--useChirality 0" checked="false" /> | |
100 <param name="useBondTypes" type="boolean" label="include information about the number of hydrogens on each atom" truevalue="--useBondTypes 1" falsevalue="--useBondTypes 0" checked="true" /> | |
101 </when> | |
102 <when value="--pairs"> | |
103 <param name="fpSize" type="integer" value="2048" label="number of bits in the fingerprint" help=""> | |
104 <validator type="in_range" min="1" /> | |
105 </param> | |
106 <param name="minLength" type="integer" value="1" label="minimum bond count for a pair" help=""> | |
107 <validator type="in_range" min="1" /> | |
108 </param> | |
109 <param name="maxLength" type="integer" value="30" label="maximum bond count for a pair" help=""> | |
110 <validator type="in_range" min="1" /> | |
111 </param> | |
112 </when> | |
113 <when value="--maccs166" /> | |
114 <when value="--substruct" /> | |
115 </conditional> | |
116 | |
117 </inputs> | |
118 <outputs> | |
119 <data name="outfile" format="fps" /> | |
120 </outputs> | |
121 <tests> | |
122 <!-- FP2 --> | |
123 <test> | |
124 <param name="infile" value="CID_2244.sdf" ftype="sdf" /> | |
13 | 125 <param name="fp_opts.fp_opts_selector" value="--FP2" /> |
9 | 126 <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> |
127 </test> | |
128 <test> | |
129 <param name="infile" value="CID_2244.smi" ftype="smi" /> | |
13 | 130 <param name="fp_opts.fp_opts_selector" value="--FP2" /> |
9 | 131 <output name="outfile" file="CID_2244_FP2.fps" ftype="fps" /> |
132 </test> | |
133 <!-- FP3 --> | |
134 <test> | |
135 <param name="infile" value="CID_2244.sdf" ftype="sdf" /> | |
13 | 136 <param name="fp_opts.fp_opts_selector" value="--FP3" /> |
9 | 137 <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> |
138 </test> | |
139 <test> | |
140 <param name="infile" value="CID_2244.smi" ftype="smi" /> | |
13 | 141 <param name="fp_opts.fp_opts_selector" value="--FP3" /> |
9 | 142 <output name="outfile" file="CID_2244_FP3.fps" ftype="fps" /> |
143 </test> | |
144 <!-- FP4 --> | |
145 <test> | |
146 <param name="infile" value="CID_2244.sdf" ftype="sdf" /> | |
13 | 147 <param name="fp_opts.fp_opts_selector" value="--FP4" /> |
9 | 148 <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> |
149 </test> | |
150 <test> | |
151 <param name="infile" value="CID_2244.smi" ftype="smi" /> | |
13 | 152 <param name="fp_opts.fp_opts_selector" value="--FP4" /> |
9 | 153 <output name="outfile" file="CID_2244_FP4.fps" ftype="fps" /> |
154 </test> | |
155 <!-- MACCS --> | |
156 <test> | |
157 <param name="infile" value="CID_2244.sdf" ftype="sdf" /> | |
13 | 158 <param name="fp_opts.fp_opts_selector" value="--MACCS" /> |
9 | 159 <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> |
160 </test> | |
161 <test> | |
162 <param name="infile" value="CID_2244.smi" ftype="smi" /> | |
13 | 163 <param name="fp_opts.fp_opts_selector" value="--MACCS" /> |
9 | 164 <output name="outfile" file="CID_2244_MACCS.fps" ftype="fps" /> |
165 </test> | |
166 </tests> | |
167 <help> | |
168 | |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
169 .. class:: infomark |
9 | 170 |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
171 **What this tool does** |
9 | 172 |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
173 This tool uses chemfp_ to calculate 10 different fingerprints of common file formats. Chemfp uses `Open Babel`_, OpenEye_ and RDKit_. |
22
6c496b524b41
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
20
diff
changeset
|
174 |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
175 For more information check the websites listed below:: |
22
6c496b524b41
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
20
diff
changeset
|
176 |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
177 - http://code.google.com/p/rdkit/wiki/FingerprintsInTheRDKit |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
178 - http://openbabel.org/wiki/Tutorial:Fingerprints |
9 | 179 |
180 ----- | |
181 | |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
182 .. class:: infomark |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
183 |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
184 **Input** |
9 | 185 |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
186 FPS fingerprint file format |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
187 |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
188 * Example:: |
9 | 189 |
190 - SDF File | |
191 | |
192 28434379 | |
193 -OEChem-02031205132D | |
194 | |
195 37 39 0 0 0 0 0 0 0999 V2000 | |
196 8.1648 -1.8842 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 | |
197 6.0812 -0.2134 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 | |
198 6.0812 -1.8229 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 | |
199 2.5369 -2.0182 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 | |
200 6.3919 0.7371 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 | |
201 7.3704 0.9433 0.0000 C 0 0 0 0 | |
202 ...... | |
203 1 15 1 0 0 0 0 | |
204 1 35 1 0 0 0 0 | |
205 2 5 1 0 0 0 0 | |
206 2 11 1 0 0 0 0 | |
207 2 12 1 0 0 0 0 | |
208 3 12 2 0 0 0 0 | |
209 3 13 1 0 0 0 0 | |
210 4 18 1 0 0 0 0 | |
211 ...... | |
212 | |
213 >PUBCHEM_COMPOUND_CID< | |
214 28434379 | |
215 | |
216 > <PUBCHEM_COMPOUND_CANONICALIZED> | |
217 1 | |
218 | |
219 > <PUBCHEM_CACTVS_COMPLEXITY> | |
220 280 | |
221 | |
222 > <PUBCHEM_CACTVS_HBOND_ACCEPTOR> | |
223 2 | |
224 | |
225 > <PUBCHEM_CACTVS_HBOND_DONOR> | |
226 2 | |
227 | |
228 > <PUBCHEM_CACTVS_ROTATABLE_BOND> | |
229 2 | |
230 | |
231 > <PUBCHEM_CACTVS_SUBSKEYS> | |
232 AAADceBzIAAAAAAAAAAAAAAAAAAAAWAAAAAwYAAAAAAAAFgB8AAAHgAQCAAACCjhlwYx0LdMEgCgASZiZASCgC0hEqAJ2CA4dJiKeKLA2dGUJAhokALYyCcQAAAAAACAAAQAACAAAQAACAAAQAAAAAAAAA== | |
233 | |
234 > | |
235 | |
236 - type : FP2 | |
237 | |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
238 ----- |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
239 |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
240 .. class:: infomark |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
241 |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
242 **Output** |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
243 |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
244 * Example:: |
9 | 245 |
246 #FPS1 | |
247 #num_bits=1021 | |
248 #type=OpenBabel-FP2/1 | |
249 #software=OpenBabel/2.3.0 | |
250 #source=/tmp/dataset_409.dat.sdf | |
251 #date=2012-02-03T11:13:39 | |
252 c0000000000008c0000846000400000000000010800000000000004000000000100010000700802170000018000000c | |
253 0010000000020600208008000008000000c000c02c00002000000c00000100000008001400c800001c0180000000300 | |
254 10000000000080000000c0000060000c0000060810000010000000800102000000 28434379 | |
255 | |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
256 ----- |
9 | 257 |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
258 .. class:: infomark |
22
6c496b524b41
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
20
diff
changeset
|
259 |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
260 **Cite** |
22
6c496b524b41
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
20
diff
changeset
|
261 |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
262 | `Open Babel`_ |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
263 | RDKit_ project |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
264 | chemfp_ project. |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
265 | |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
266 | N M O'Boyle, M Banck, C A James, C Morley, T Vandermeersch and G R Hutchison. `Open Babel: An open chemical toolbox.`_ |
22
6c496b524b41
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
20
diff
changeset
|
267 |
23
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
268 .. _`Open Babel: An open chemical toolbox.`: http://www.jcheminf.com/content/3/1/33 |
1868005213a1
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
22
diff
changeset
|
269 .. _OpenEye: http://www.eyesopen.com/ |
22
6c496b524b41
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
20
diff
changeset
|
270 .. _chemfp: http://chemfp.com/ |
6c496b524b41
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
20
diff
changeset
|
271 .. _RDKit: http://www.rdkit.org/ |
6c496b524b41
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
20
diff
changeset
|
272 .. _`Open Babel`: http://openbabel.org/ |
6c496b524b41
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
20
diff
changeset
|
273 |
6c496b524b41
ChemicalToolBoX update.
Bjoern Gruening <bjoern.gruening@gmail.com>
parents:
20
diff
changeset
|
274 |
9 | 275 </help> |
276 </tool> |