annotate simsearch.xml @ 10:0d31a1af7075

ChemicalToolBoX update.
author Bjoern Gruening <bjoern.gruening@gmail.com>
date Mon, 27 May 2013 16:53:06 +0200
parents 980199050694
children 526f6e88fbe7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
1 <tool id="chemfp_simsearch" name="Similarity Search" version="0.1">
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
2 <description>of fingerprint data sets</description>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
3 <!--<parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="10000" shared_inputs="targets,fastsearch" merge_outputs="outfile"></parallelism>-->
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
4 <requirements>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
5 <requirement type="package" version="1.1p1">chemfp</requirement>
7
980199050694 Uploaded
bgruening
parents: 3
diff changeset
6 <requirement type="package" version="2.3.2">openbabel</requirement>
0
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
7 </requirements>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
8 <command>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
9 #if $method_opts.method_opts_selector == "chemfp":
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
10 ## TODO: remove all comment lines, in muti mode many comment lines will be appear, also in the middle of the result file, remove them
3
512145f2e417 Uploaded
bgruening
parents: 0
diff changeset
11 simsearch -k $method_opts.k --threshold $method_opts.threshold --query-format fps --target-format fps -q "${method_opts.query}" "${method_opts.targets}" -o "${outfile}"
0
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
12 #else:
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
13 ## OpenBabel needs the original molecule file (molecule.'fileformat') next to the fastsearch index (molecule.fs). We use a composite datatype to accomplish that.
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
14 ## Furthermore OpenBabel is really picky with fileextensions. We need to specify every datatype. I did not find a solution to specify the query-filetype.
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
15 ## A workaround is to create a symlink with a proper file-extension.
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
16 #import tempfile
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
17 #set $temp_file = tempfile.NamedTemporaryFile()
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
18 #set $temp_link = "%s.%s" % ($temp_file.name, $method_opts.query.ext)
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
19 $temp_file.close()
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
20 ln -s $method_opts.query $temp_link;
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
21 obabel -i fs "${os.path.join($method_opts.fastsearch.extra_files_path,'molecule.fs')}" -S "${temp_link}" -at${method_opts.threshold} -O "${outfile}" -osmi -aa 2>&#38;1;
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
22 rm $temp_link
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
23 #end if
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
24 </command>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
25 <inputs>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
26
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
27 <conditional name="method_opts">
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
28 <param name="method_opts_selector" type="select" label="Subject database/sequences">
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
29 <option value="chemfp">Chemfp fingerprint file</option>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
30 <option value="obabel">OpenBabel Fastsearch Index</option>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
31 </param>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
32 <when value="chemfp">
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
33 <param name="query" type='data' format="fps" label="query"/>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
34 <param name="targets" type='data' format="fps" label="Target database"/>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
35 <param name="k" type='text' label="k nearest neighbor" value='all'/>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
36 <param name="threshold" type='float' label="threshold" value='0.7'/>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
37 </when>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
38 <when value="obabel">
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
39 <param name="query" type='data' format="smi,mol,sdf,inchi" label="query"/>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
40 <param name="fastsearch" type='data' format="obfs" label="OpenBabel Fastsearch Index"/>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
41 <param name="threshold" type='float' label="threshold" value='0.7'/>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
42 </when>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
43 </conditional>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
44
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
45 </inputs>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
46 <outputs>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
47 <data name="outfile" format="tabular" />
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
48 </outputs>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
49 <tests>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
50 <test>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
51 <param name="targets" ftype="fps" value="targets.fps"/>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
52 <param name="query" ftype="fps" value="q.fps"/>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
53 <param name="k" value='4'/>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
54 <param name="th" value='0.7'/>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
55 <output name="outfile" ftype="tabular" file="simsearch_on_tragets_and_q.tabular"/>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
56 </test>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
57 </tests>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
58 <help>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
59
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
60
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
61 **What it does**
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
62
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
63 similarity search of fingerprint data sets
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
64
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
65 -----
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
66
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
67 **Example**
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
68
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
69 * input::
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
70
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
71 - Target Database in FPS format
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
72
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
73 #FPS1
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
74 #num_bits=881
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
75 #type=CACTVS-E_SCREEN/1.0 extended=2
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
76 #software=CACTVS/unknown
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
77 #source=Desktop/3579363516810334491.sdf
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
78 #date=2012-02-03T13:07:47
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
79 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
80 cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2926101609401b13e408000000000001
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
81 00200000040080000010000002000000000000 55169009
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
82 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
83 cc0bed3248000580644626004101b4844805901b041c2e19111e45039b8b2926105609401313e408000000000001
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
84 00200000040080000010000002000000000000 55079807
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
85 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e960
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
86 cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e408000000000001
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
87 00200000040080000010000002000000000000 3153534
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
88 07ce0c000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e960
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
89 cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2926101609401b13e408000000000001
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
90 00200000040080000010000002000000000000 55168823
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
91 07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
92 cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e408000000000001
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
93 00200000040080000010000002000000000000 55102353
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
94 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e9
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
95 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800003000
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
96 000100200000040080000010000002000000000000 55091849
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
97 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e9
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
98 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
99 000100200000040080000010000002000000000000 55091752
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
100 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e9
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
101 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
102 000100200000040080000010000002000000000000 55091467
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
103 07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
104 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
105 000100200000040080000010000002000000000000 55091466
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
106 07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
107 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
108 000100200000040080000010000002000000000000 55091416
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
109 03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
110 60cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
111 000100200000040080000010000002000000000000 6499094
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
112 03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
113 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
114 000100200000040080000010000002000000000000 6485578
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
115 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e9
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
116 60cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
117 000100200000040080000010000002000000000000 6485577
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
118
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
119 - query :
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
120 #FPS1
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
121 #num_bits=881
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
122 #type=CACTVS-E_SCREEN/1.0 extended=2
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
123 #software=CACTVS/unknown
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
124 #source=CID_28434379.sdf
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
125 #date=2012-02-03T13:08:39
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
126 07ce04000000000000000000000000000080060000000c060000000000001a800f0000780008100000101487e9
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
127 608c0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
128 000100200000040080000010000002000000000000 28434379
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
129 - k : 4
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
130 - threshold : 0.7
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
131
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
132 * output::
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
133
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
134 #Simsearch/1
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
135 #num_bits=881
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
136 #type=Tanimoto k=4 threshold=0.7
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
137 #software=chemfp/1.0
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
138 #queries=q.fps
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
139 #targets=target.fps
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
140 #query_sources=CID_28434379.sdf
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
141 #target_sources=Desktop/3579363516810334491.sdf
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
142 4 28434379 55091752 0.9684 55091466 0.9682 55091416 0.9682 55102353 0.9682
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
143
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
144
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
145 </help>
b4cbee77896d Uploaded
bgruening
parents:
diff changeset
146 </tool>