Mercurial > repos > bgruening > simsearch
changeset 0:b4cbee77896d
Uploaded
author | bgruening |
---|---|
date | Tue, 26 Mar 2013 14:57:22 -0400 |
parents | |
children | e02f5af0e6de |
files | repository_dependencies.xml simsearch.xml test-data/q.fps test-data/simsearch_on_tragets_and_q.tabular test-data/target.fps |
diffstat | 5 files changed, 185 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Tue Mar 26 14:57:22 2013 -0400 @@ -0,0 +1,5 @@ +<?xml version="1.0"?> +<repositories description="This requires the Molecule datatype definitions (e.g. SMILES, InChI, SD-format)."> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="chemfp" owner="bgruening" changeset_revision="45f968b8bff6" /> + <repository toolshed="http://testtoolshed.g2.bx.psu.edu/" name="openbabel" owner="bgruening" changeset_revision="6493d130f018" /> +</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/simsearch.xml Tue Mar 26 14:57:22 2013 -0400 @@ -0,0 +1,145 @@ +<tool id="chemfp_simsearch" name="Similarity Search" version="0.1"> + <description>of fingerprint data sets</description> + <!--<parallelism method="multi" split_inputs="query" split_mode="to_size" split_size="10000" shared_inputs="targets,fastsearch" merge_outputs="outfile"></parallelism>--> + <requirements> + <requirement type="package" version="1.1p1">chemfp</requirement> + </requirements> + <command> + #if $method_opts.method_opts_selector == "chemfp": + ## TODO: remove all comment lines, in muti mode many comment lines will be appear, also in the middle of the result file, remove them + simsearch -k $method_opts.k --threshold $method_opts.threshold --in fps --target-format fps -q "${method_opts.query}" "${method_opts.targets}" -o "${outfile}" + #else: + ## OpenBabel needs the original molecule file (molecule.'fileformat') next to the fastsearch index (molecule.fs). We use a composite datatype to accomplish that. + ## Furthermore OpenBabel is really picky with fileextensions. We need to specify every datatype. I did not find a solution to specify the query-filetype. + ## A workaround is to create a symlink with a proper file-extension. + #import tempfile + #set $temp_file = tempfile.NamedTemporaryFile() + #set $temp_link = "%s.%s" % ($temp_file.name, $method_opts.query.ext) + $temp_file.close() + ln -s $method_opts.query $temp_link; + obabel -i fs "${os.path.join($method_opts.fastsearch.extra_files_path,'molecule.fs')}" -S "${temp_link}" -at${method_opts.threshold} -O "${outfile}" -osmi -aa 2>&1; + rm $temp_link + #end if + </command> + <inputs> + + <conditional name="method_opts"> + <param name="method_opts_selector" type="select" label="Subject database/sequences"> + <option value="chemfp">Chemfp fingerprint file</option> + <option value="obabel">OpenBabel Fastsearch Index</option> + </param> + <when value="chemfp"> + <param name="query" type='data' format="fps" label="query"/> + <param name="targets" type='data' format="fps" label="Target database"/> + <param name="k" type='text' label="k nearest neighbor" value='all'/> + <param name="threshold" type='float' label="threshold" value='0.7'/> + </when> + <when value="obabel"> + <param name="query" type='data' format="smi,mol,sdf,inchi" label="query"/> + <param name="fastsearch" type='data' format="obfs" label="OpenBabel Fastsearch Index"/> + <param name="threshold" type='float' label="threshold" value='0.7'/> + </when> + </conditional> + + </inputs> + <outputs> + <data name="outfile" format="tabular" /> + </outputs> + <tests> + <test> + <param name="targets" ftype="fps" value="targets.fps"/> + <param name="query" ftype="fps" value="q.fps"/> + <param name="k" value='4'/> + <param name="th" value='0.7'/> + <output name="outfile" ftype="tabular" file="simsearch_on_tragets_and_q.tabular"/> + </test> + </tests> + <help> + + +**What it does** + +similarity search of fingerprint data sets + +----- + +**Example** + +* input:: + + - Target Database in FPS format + + #FPS1 + #num_bits=881 + #type=CACTVS-E_SCREEN/1.0 extended=2 + #software=CACTVS/unknown + #source=Desktop/3579363516810334491.sdf + #date=2012-02-03T13:07:47 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960 + cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2926101609401b13e408000000000001 + 00200000040080000010000002000000000000 55169009 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960 + cc0bed3248000580644626004101b4844805901b041c2e19111e45039b8b2926105609401313e408000000000001 + 00200000040080000010000002000000000000 55079807 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e960 + cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e408000000000001 + 00200000040080000010000002000000000000 3153534 + 07ce0c000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e960 + cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2926101609401b13e408000000000001 + 00200000040080000010000002000000000000 55168823 + 07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960 + cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e408000000000001 + 00200000040080000010000002000000000000 55102353 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e9 + 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800003000 + 000100200000040080000010000002000000000000 55091849 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e9 + 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000 + 000100200000040080000010000002000000000000 55091752 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e9 + 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000 + 000100200000040080000010000002000000000000 55091467 + 07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9 + 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000 + 000100200000040080000010000002000000000000 55091466 + 07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9 + 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000 + 000100200000040080000010000002000000000000 55091416 + 03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9 + 60cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000 + 000100200000040080000010000002000000000000 6499094 + 03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e9 + 60cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000 + 000100200000040080000010000002000000000000 6485578 + 07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e9 + 60cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000 + 000100200000040080000010000002000000000000 6485577 + + - query : + #FPS1 + #num_bits=881 + #type=CACTVS-E_SCREEN/1.0 extended=2 + #software=CACTVS/unknown + #source=CID_28434379.sdf + #date=2012-02-03T13:08:39 + 07ce04000000000000000000000000000080060000000c060000000000001a800f0000780008100000101487e9 + 608c0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000 + 000100200000040080000010000002000000000000 28434379 + - k : 4 + - threshold : 0.7 + +* output:: + + #Simsearch/1 + #num_bits=881 + #type=Tanimoto k=4 threshold=0.7 + #software=chemfp/1.0 + #queries=q.fps + #targets=target.fps + #query_sources=CID_28434379.sdf + #target_sources=Desktop/3579363516810334491.sdf + 4 28434379 55091752 0.9684 55091466 0.9682 55091416 0.9682 55102353 0.9682 + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/q.fps Tue Mar 26 14:57:22 2013 -0400 @@ -0,0 +1,7 @@ +#FPS1 +#num_bits=881 +#type=CACTVS-E_SCREEN/1.0 extended=2 +#software=CACTVS/unknown +#source=CID_28434379.sdf +#date=2012-02-03T13:08:39 +07ce04000000000000000000000000000080060000000c060000000000001a800f0000780008100000101487e9608c0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 28434379
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/simsearch_on_tragets_and_q.tabular Tue Mar 26 14:57:22 2013 -0400 @@ -0,0 +1,9 @@ +#Simsearch/1 +#num_bits=881 +#type=Tanimoto k=4 threshold=0.7 +#software=chemfp/1.0 +#queries=q.fps +#targets=target.fps +#query_sources=CID_28434379.sdf +#target_sources=Desktop/3579363516810334491.sdf +4 28434379 55091752 0.9684 55091466 0.9682 55091416 0.9682 55102353 0.9682
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/target.fps Tue Mar 26 14:57:22 2013 -0400 @@ -0,0 +1,19 @@ +#FPS1 +#num_bits=881 +#type=CACTVS-E_SCREEN/1.0 extended=2 +#software=CACTVS/unknown +#source=Desktop/3579363516810334491.sdf +#date=2012-02-03T13:07:47 +07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701487e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55169009 +07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000701087e960cc0bed3248000580644626004101b4844805901b041c2e19111e45039b8b2926105609401313e40800000000000100200000040080000010000002000000000000 55079807 +07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 3153534 +07ce0c000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2926101609401b13e40800000000000100200000040080000010000002000000000000 55168823 +07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 55102353 +07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800003000000100200000040080000010000002000000000000 55091849 +07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000103c87e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 55091752 +07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01487e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000000100200000040080000010000002000000000000 55091467 +07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 55091466 +07ce05000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 55091416 +03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 6499094 +03ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000101087e960cc0bed3248000580644626204101b4844805901b041c2e19511e45039b8b2924101609401b13e40800000000000100200000040080000010000002000000000000 6485578 +07ce04000000000000000000000000000080060000000c000000000000001a800f0000780008100000b01087e960cc0bed3248000580644626004101b4844805901b041c2e19511e45039b8b2924101609401b13e40800001000000100200000040080000010000002000000000000 6485577