changeset 0:b4cbee77896d

Uploaded
author bgruening
date Tue, 26 Mar 2013 14:57:22 -0400
parents
children e02f5af0e6de
files repository_dependencies.xml simsearch.xml test-data/q.fps test-data/simsearch_on_tragets_and_q.tabular test-data/target.fps
diffstat 5 files changed, 185 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Tue Mar 26 14:57:22 2013 -0400
@@ -0,0 +1,5 @@
+<?xml version="1.0"?>
+<repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format).">
+    <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="chemfp" owner="bgruening" changeset_revision="45f968b8bff6" />
+    <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="openbabel" owner="bgruening" changeset_revision="6493d130f018" />
+</repositories> 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/simsearch.xml	Tue Mar 26 14:57:22 2013 -0400
@@ -0,0 +1,145 @@
+<tool id="chemfp_simsearch" name="Similarity Search" version="0.1">
+    <description>of fingerprint data sets</description>
+    <!--<parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="10000" shared_inputs="targets,fastsearch" merge_outputs="outfile"></parallelism>-->
+    <requirements>
+        <requirement type="package" version="1.1p1">chemfp</requirement>
+    </requirements>
+    <command>
+        #if $method_opts.method_opts_selector == "chemfp":
+            ## TODO: remove all comment lines, in muti mode many comment lines will be appear, also in the middle of the result file, remove them
+            simsearch -k $method_opts.k --threshold $method_opts.threshold --in fps --target-format fps -q "${method_opts.query}" "${method_opts.targets}" -o "${outfile}"
+        #else:
+            ## OpenBabel needs the original molecule file (molecule.'fileformat') next to the fastsearch index (molecule.fs). We use a composite datatype to accomplish that.
+            ## Furthermore OpenBabel is really picky with fileextensions. We need to specify every datatype. I did not find a solution to specify the query-filetype.
+            ## A workaround is to create a symlink with a proper file-extension.
+            #import tempfile
+            #set $temp_file = tempfile.NamedTemporaryFile()
+            #set $temp_link = "%s.%s" % ($temp_file.name, $method_opts.query.ext)
+            $temp_file.close()
+            ln -s $method_opts.query $temp_link;
+            obabel -i fs "${os.path.join($method_opts.fastsearch.extra_files_path,'molecule.fs')}" -S "${temp_link}" -at${method_opts.threshold} -O "${outfile}" -osmi -aa 2>&#38;1;
+            rm $temp_link
+        #end if
+    </command>
+    <inputs>
+
+        <conditional name="method_opts">
+            <param name="method_opts_selector" type="select" label="Subject database/sequences">
+              <option value="chemfp">Chemfp fingerprint file</option>
+              <option value="obabel">OpenBabel Fastsearch Index</option>
+            </param>
+            <when value="chemfp">
+                <param name="query" type='data' format="fps" label="query"/>
+                <param name="targets" type='data' format="fps" label="Target database"/>
+                <param name="k" type='text'  label="k nearest neighbor" value='all'/>
+                <param name="threshold" type='float' label="threshold" value='0.7'/>
+            </when>
+            <when value="obabel">
+                <param name="query" type='data' format="smi,mol,sdf,inchi" label="query"/>
+                <param name="fastsearch" type='data' format="obfs" label="OpenBabel Fastsearch Index"/>
+                <param name="threshold" type='float' label="threshold" value='0.7'/>
+            </when>
+        </conditional>
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="targets" ftype="fps" value="targets.fps"/>
+            <param name="query" ftype="fps" value="q.fps"/>
+            <param name="k" value='4'/>
+            <param name="th" value='0.7'/>
+            <output name="outfile" ftype="tabular" file="simsearch_on_tragets_and_q.tabular"/>
+        </test>
+    </tests>
+    <help>
+
+
+**What it does**
+
+similarity search of fingerprint data sets
+
+-----
+
+**Example**
+
+* input::
+	
+	      - Target Database in FPS format
+
+			#FPS1
+			#num_bits=881
+			#type=CACTVS-E_SCREEN/1.0 extended=2
+			#software=CACTVS/unknown
+			#source=Desktop/3579363516810334491.sdf
+			#date=2012-02-03T13:07:47
+			07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960
+			cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2926101609401b13e408000000000001
+			00200000040080000010000002000000000000	55169009
+			07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960
+			cc0bed3248000580644626004101b4844805901b041c2e19111e45039b8b2926105609401313e408000000000001
+			00200000040080000010000002000000000000	55079807
+			07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e960
+			cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e408000000000001
+			00200000040080000010000002000000000000	3153534
+			07ce0c000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e960
+			cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2926101609401b13e408000000000001
+			00200000040080000010000002000000000000	55168823
+			07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960
+			cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e408000000000001
+			00200000040080000010000002000000000000	55102353
+			07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e9
+			60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800003000
+			000100200000040080000010000002000000000000	55091849
+			07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e9
+			60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
+			000100200000040080000010000002000000000000	55091752
+			07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e9
+			60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000
+			000100200000040080000010000002000000000000	55091467
+			07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9
+			60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
+			000100200000040080000010000002000000000000	55091466
+			07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9
+			60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
+			000100200000040080000010000002000000000000	55091416
+			03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9
+			60cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
+			000100200000040080000010000002000000000000	6499094
+			03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9
+			60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
+			000100200000040080000010000002000000000000	6485578
+			07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e9
+			60cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000
+			000100200000040080000010000002000000000000	6485577
+
+		- query : 
+			#FPS1
+			#num_bits=881
+			#type=CACTVS-E_SCREEN/1.0 extended=2
+			#software=CACTVS/unknown
+			#source=CID_28434379.sdf
+			#date=2012-02-03T13:08:39
+			07ce04000000000000000000000000000080060000000c060000000000001a800f0000780008100000101487e9
+			608c0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000
+			000100200000040080000010000002000000000000	28434379
+		- k : 4
+		- threshold : 0.7
+
+* output::
+
+	#Simsearch/1
+	#num_bits=881
+	#type=Tanimoto k=4 threshold=0.7
+	#software=chemfp/1.0
+	#queries=q.fps
+	#targets=target.fps
+	#query_sources=CID_28434379.sdf
+	#target_sources=Desktop/3579363516810334491.sdf
+	4	28434379	55091752	0.9684	55091466	0.9682	55091416	0.9682	55102353	0.9682
+
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/q.fps	Tue Mar 26 14:57:22 2013 -0400
@@ -0,0 +1,7 @@
+#FPS1
+#num_bits=881
+#type=CACTVS-E_SCREEN/1.0 extended=2
+#software=CACTVS/unknown
+#source=CID_28434379.sdf
+#date=2012-02-03T13:08:39
+07ce04000000000000000000000000000080060000000c060000000000001a800f0000780008100000101487e9608c0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	28434379
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/simsearch_on_tragets_and_q.tabular	Tue Mar 26 14:57:22 2013 -0400
@@ -0,0 +1,9 @@
+#Simsearch/1
+#num_bits=881
+#type=Tanimoto k=4 threshold=0.7
+#software=chemfp/1.0
+#queries=q.fps
+#targets=target.fps
+#query_sources=CID_28434379.sdf
+#target_sources=Desktop/3579363516810334491.sdf
+4	28434379	55091752	0.9684	55091466	0.9682	55091416	0.9682	55102353	0.9682
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/target.fps	Tue Mar 26 14:57:22 2013 -0400
@@ -0,0 +1,19 @@
+#FPS1
+#num_bits=881
+#type=CACTVS-E_SCREEN/1.0 extended=2
+#software=CACTVS/unknown
+#source=Desktop/3579363516810334491.sdf
+#date=2012-02-03T13:07:47
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000	55169009
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000	55079807
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	3153534
+07ce0c000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000	55168823
+07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	55102353
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800003000000100200000040080000010000002000000000000	55091849
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	55091752
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000000100200000040080000010000002000000000000	55091467
+07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	55091466
+07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	55091416
+03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	6499094
+03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000	6485578
+07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000000100200000040080000010000002000000000000	6485577