Mercurial > repos > bgruening > graphclust_cmfinder
changeset 0:407180a7f27e draft default tip
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CMFinder commit f447414150c19865e904d3914a68e2479fadddce
author | bgruening |
---|---|
date | Thu, 15 Dec 2016 18:19:27 -0500 |
parents | |
children | |
files | cmFinder.py cmFinder.xml test-data/cmfinder.fa test-data/in.model.tree.stk test-data/model.cmfinder.stk |
diffstat | 5 files changed, 267 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cmFinder.py Thu Dec 15 18:19:27 2016 -0500 @@ -0,0 +1,43 @@ +import os +import sys +from shutil import copyfile +from os import system + + +def sh(script): + system("bash -c '%s'" % script) + + +model_tree_stk = sys.argv[1] +cmfinder_fa = sys.argv[2] +path = sys.argv[3] + +gapCmd = "" +gapVal = "" +if len(sys.argv) > 4: + gapCmd = sys.argv[4] + gapVal = sys.argv[5] + + +cmd = " cp -f %s model.cmfinder.stk" % (model_tree_stk) +sh(cmd) + +alifoldCmd = "%salifold.pl -file %s" % (path, model_tree_stk) +# alifoldCmd = "perl " + path + "/alifold.pl -file " + model_tree_stk +sh(alifoldCmd) + +cmd_stk = "%smloc2stockholm.pl -file model.cmfinder.stk -split_input yes --con_struct %s.alifold" % (path, model_tree_stk) +# cmd_stk = "perl " + path + "/mloc2stockholm.pl -file model.cmfinder.stk -split_input yes --con_struct " + model_tree_stk + ".alifold" +sh(cmd_stk) + +model_tree_stk_sth = "model.cmfinder.stk.sth" +x = "cat " + model_tree_stk_sth +sh("mv model.cmfinder.stk.sth model.tree.stk") + +sh("cmfinder %s %s -a model.tree.stk %s output > model.cmfinder.stk" % (gapCmd, gapVal, cmfinder_fa)) +# sh("cmfinder " + gapCmd + " " + gapVal + " -a model.tree.stk" + " " + cmfinder_fa + " " + " output > model.cmfinder.stk") + +if os.path.isfile('output'): + sh("rm output") +else: + copyfile("model.tree.stk", "model.cmfinder.stk")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cmFinder.xml Thu Dec 15 18:19:27 2016 -0500 @@ -0,0 +1,69 @@ +<tool id="cmFinder" name="CMFinder_v0" version="0.1.0" > + <requirements> + <requirement type="package" version="0.1">graphclust-wrappers</requirement> + <requirement type="package" version='0.2'>cmfinder</requirement> + <requirement type="package" version='2.2.10'>viennarna</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + <![CDATA[ + + python '$__tool_directory__/cmFinder.py' '$model_tree_stk' '$cmfinder_fa' '' + $gap_threshold_opts.gap_threshold_opts_selector + #if str($gap_threshold_opts.gap_threshold_opts_selector) == '--g': + $gap_threshold_opts.gap + #end if + +]]> + </command> + <inputs> + <param name="model_tree_stk" type="data" format="stockholm" label="model_tree_stk" help="" /> + <param name="cmfinder_fa" type="data" format="text" label="cmfinder_fa" help="" /> + <conditional name="gap_threshold_opts"> + <param name="gap_threshold_opts_selector" type="select" label="Use gap threshold" help=""> + <option value="--g" selected="true">Yes (--g)</option> + <option value="">No</option> + </param> + <when value="--g"> + <param name="gap" type="float" value="1.0" size="5" + label="Define the gap threshold to determine the conserved column (--g)" help=""/> + </when> + <when value=""/> + </conditional> + </inputs> + <outputs> + <data name="model_cmfinder_stk" format="stockholm" label="model_cmfinder_stk" from_work_dir="model.cmfinder.stk"/> + </outputs> + <tests> + <test> + <param name="model_tree_stk" value="in.model.tree.stk"/> + <param name="cmfinder_fa" value="cmfinder.fa"/> + <param name="gap_threshold_opts.gap_threshold_opts_selector" value="--g"/> + <param name="gap_threshold_opts.gap" value="1.0"/> + <output name="model_cmfinder_stk" file="model.cmfinder.stk"/> + </test> + </tests> + <help> + <![CDATA[ +**What it does** + +At first it converts CLUSTAL format files to STOCKHOLM format. Then using *CMFinder* determines consensus +motives for sequences. +]]> + </help> + <citations> + <citation type="bibtex">@inproceedings{costa2010fast, + title={Fast neighborhood subgraph pairwise distance kernel}, + author={Costa, Fabrizio and De Grave, Kurt}, + booktitle={Proceedings of the 26th International Conference on Machine Learning}, + pages={255--262}, + year={2010}, + organization={Omnipress} + } + </citation> + <citation type="doi">10.1093/bioinformatics/btk008</citation> + <citation type="doi">10.1186/1748-7188-6-26</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cmfinder.fa Thu Dec 15 18:19:27 2016 -0500 @@ -0,0 +1,40 @@ +>7 SEQ7#1#83#+ ORIGID RF00005_rep.12_AC108081.2/59868-59786_7 ORIGHEAD RF00005_rep.12 +GUCAGGAUGGCCGAGCGGUCUAAGGCGCUGCGUUCAGGUCGCAGUCUCCCCUGGAGGCGUGGGUUCGAAUCCCACUUCUGACA +>9 SEQ9#1#73#+ ORIGID RF00005_rep.14_AL021808.2/65570-65498_9 ORIGHEAD RF00005_rep.14 +GCUUCUGUAGUGUAGUGGUUAUCACGUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAAACCGGGCAGAAGCA +>10 SEQ10#1#73#+ ORIGID RF00005_rep.15_AC008443.10/42590-42518_10 ORIGHEAD RF00005_rep.15 +GCCCGGCUAGCUCAGUCGGUAGAGCAUGAGACUCUUAAUCUCAGGGUCGUGGGUUCGAGCCCCACGUUGGGCG +>15 SEQ15#1#73#+ ORIGID RF00005_rep.1_AC005329.1/7043-6971_15 ORIGHEAD RF00005_rep.1 +GCCGAAAUAGCUCAGUUGGGAGAGCGUUAGACUGAAGAUCUAAAGGUCCCUGGUUCGAUCCCGGGUUUCGGCA +>16 SEQ16#1#72#+ ORIGID RF00005_rep.20_AL671879.2/100356-100285_16 ORIGHEAD RF00005_rep.20 +GGGGAUGUAGCUCAGUGGUAGAGCGCAUGCUUCGCAUGUAUGAGGCCCCGGGUUCGAUCCCCGGCAUCUCCA +>17 SEQ17#1#71#+ ORIGID RF00005_rep.21_AL355149.13/15278-15208_17 ORIGHEAD RF00005_rep.21 +GCAUUGGUGGUUCAGUGGUAGAAUUCUCGCCUCCCACGCGGGAGACCCGGGUUCAAUUCCCGGCCAAUGCA +>18 SEQ18#1#72#+ ORIGID RF00005_rep.22_AL590385.23/26487-26416_18 ORIGHEAD RF00005_rep.22 +GCGUUGGUGGUAUAGUGGUGAGCAUAGCUGCCUUCCAAGCAGUUGACCCGGGUUCGAUUCCCGGCCAACGCA +>23 SEQ23#1#74#+ ORIGID RF00005_rep.27_AL352978.6/119697-119770_23 ORIGHEAD RF00005_rep.27 +GGCCGGUUAGCUCAGUUGGUUAGAGCGUGGUGCUAAUAACGCCAAGGUCGCGGGUUCGAUCCCCGUACGGGCCA +>28 SEQ28#1#71#+ ORIGID RF00005_rep.31_AC092686.3/29631-29561_28 ORIGHEAD RF00005_rep.31 +GCAUUGGUGGUUCAGUGGUAGAAUUCUCGCCUGCCACGCGGGAGGCCCGGGUUCGAUUCCCGGCCAAUGCA +>30 SEQ30#1#72#+ ORIGID RF00005_rep.33_AC018638.5/4694-4623_30 ORIGHEAD RF00005_rep.33 +GGCUCGUUGGUCUAGGGGUAUGAUUCUCGCUUAGGGUGCGAGAGGUCCCGGGUUCAAAUCCCGGACGAGCCC +>31 SEQ31#1#73#+ ORIGID RF00005_rep.34_AC008443.10/43006-42934_31 ORIGHEAD RF00005_rep.34 +GUUUCCGUAGUGUAGUGGUUAUCACGUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAAACCGGGCGGAAACA +>32 SEQ32#1#73#+ ORIGID RF00005_rep.35_AC005783.1/27398-27326_32 ORIGHEAD RF00005_rep.35 +GUUUCCGUAGUGUAGCGGUUAUCACAUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAUCCCGGGCGGAAACA +>33 SEQ33#1#72#+ ORIGID RF00005_rep.36_AC007298.17/145366-145295_33 ORIGHEAD RF00005_rep.36 +UCCUCGUUAGUAUAGUGGUGAGUAUCCCCGCCUGUCACGCGGGAGACCGGGGUUCGAUUCCCCGACGGGGAG +>35 SEQ35#1#72#+ ORIGID RF00005_rep.38_J00309.1/356-427_35 ORIGHEAD RF00005_rep.38 +UCCCUGGUGGUCUAGUGGCUAGGAUUCGGCGCUUUCACCGCCGCGCCCCGGGUUCGAUUCCCGGCCAGGAAU +>37 SEQ37#1#82#+ ORIGID RF00005_rep.3_Z54587.1/126-45_37 ORIGHEAD RF00005_rep.3 +GGUAGCGUGGCCGAGCGGUCUAAGGCGCUGGAUUUAGGCUCCAGUCUCUUCGGAGGCGUGGGUUCGAAUCCCACCGCUGCCA +>46 SEQ46#1#72#+ ORIGID RF00005_rep.5_AL590385.23/26129-26058_46 ORIGHEAD RF00005_rep.5 +UCCCUGGUGGUCUAGUGGUUAGGAUUCGGCGCUCUCACCGCCGCGGCCCGGGUUCGAUUCCCGGUCAGGGAA +>51 SEQ51#1#88#+ ORIGID RF00006_rep.0_AF045145.1/1-88_51 ORIGHEAD RF00006_rep.0 +GGCUGGCUUUAGCUCAGCGGUUACUUCGCGUGUCAUCAAACCACCUCUCUGGGUUGUUCGAGACCCGCGGGCGCUCUCCAGCCCUCUU +>52 SEQ52#1#101#+ ORIGID RF00006_rep.1_AC005219.1/49914-50014_52 ORIGHEAD RF00006_rep.1 +GGGUCGGAGUUAGCUCAAGCGGUUACCUCCUCAUGCCGGACUUUCUAUCUGUCCAUCUCUGUGCUGGGGUUCGAGACCCGCGGGUGCUUACUGACCCUUUU +>53 SEQ53#1#98#+ ORIGID RF00006_rep.2_AF045143.1/1-98_53 ORIGHEAD RF00006_rep.2 +GGCUGGCUUUAGCUCAGCGGUUACUUCGACAGUUCUUUAAUUGAAACAAGCAACCUGUCUGGGUUGUUCGAGACCCGCGGGCGCUCUCCAGUCCUUUU +>54 SEQ54#1#88#+ ORIGID RF00006_rep.3_AF045144.1/1-88_54 ORIGHEAD RF00006_rep.3 +GGCUGGCUUUAGCUCAGCGGUUACUUCGAGUACAUUGUAACCACCUCUCUGGGUGGUUCGAGACCCGCGGGUGCUUUCCAGCUCUUUU
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in.model.tree.stk Thu Dec 15 18:19:27 2016 -0500 @@ -0,0 +1,8 @@ +CLUSTAL W --- LocARNA 1.8.10 + +18 GCGUUGGUGGUAUAGUGGUGAGCAUAGCUGCCUUCCAAGCAGUUGA-CCCGGGUUCGAUUCCCGGCCAACGCA +17 GCAUUGGUGGUUCAGUGGU-AGAAUUCUCGCCUCCCACGCGGGAGA-CCCGGGUUCAAUUCCCGGCCAAUGCA +28 GCAUUGGUGGUUCAGUGGU-AGAAUUCUCGCCUGCCACGCGGGAGG-CCCGGGUUCGAUUCCCGGCCAAUGCA +46 UCCCUGGUGGUCUAGUGGUUAGGAUUCGGCGCUCUCACCGCCGCGG-CCCGGGUUCGAUUCCCGGUCAGGGAA +35 UCCCUGGUGGUCUAGUGGCUAGGAUUCGGCGCUUUCACCGCCGCGC-CCCGGGUUCGAUUCCCGGCCAGGAAU +30 GGCUCGUUGGUCUAGGGGU-AUGAUUCUCGCUUAGGGUGCGAGAGGUCCCGGGUUCAAAUCCCGGACGAGCCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/model.cmfinder.stk Thu Dec 15 18:19:27 2016 -0500 @@ -0,0 +1,107 @@ +# STOCKHOLM 1.0 +#=GF AU CMfinder 0.2 + +#=GS 7 WT 1.00 +#=GS 9 WT 1.00 +#=GS 10 WT 1.00 +#=GS 15 WT 1.00 +#=GS 16 WT 1.00 +#=GS 17 WT 1.00 +#=GS 18 WT 1.00 +#=GS 23 WT 1.00 +#=GS 28 WT 1.00 +#=GS 30 WT 1.00 +#=GS 31 WT 1.00 +#=GS 32 WT 1.00 +#=GS 33 WT 1.00 +#=GS 35 WT 1.00 +#=GS 37 WT 1.00 +#=GS 46 WT 1.00 + +#=GS 7 DE 1.. 83 79.124153 +#=GS 9 DE 1.. 73 96.426384 +#=GS 10 DE 1.. 73 66.977600 +#=GS 15 DE 1.. 73 79.247490 +#=GS 16 DE 1.. 72 74.848335 +#=GS 17 DE 1.. 71 90.305908 +#=GS 18 DE 1.. 72 81.752869 +#=GS 23 DE 1.. 74 80.096992 +#=GS 28 DE 1.. 71 92.405518 +#=GS 30 DE 1.. 72 75.032211 +#=GS 31 DE 1.. 73 95.823235 +#=GS 32 DE 1.. 73 94.321754 +#=GS 33 DE 1.. 72 74.710342 +#=GS 35 DE 1.. 72 71.951927 +#=GS 37 DE 1.. 82 77.369476 +#=GS 46 DE 1.. 72 83.877258 + +7 GUCAGGAUGGCCGAG-CGGUCUAAGGCGCUGCGUUCAGGUCGCAGUCUCC +#=GR 7 SS <<<<<<<.....................<<<<<.......>>>>>..... +9 GCUUCUGUAGUGUAG-UGGU-UAUCACGUUCGCCUCACACGCGAA----- +#=GR 9 SS <<<<<<<.....................<<<<<.......>>>>>..... +10 GCCCGGCUAGCUCAGUCGGU--AGAGCAUGAGACUCUUAAUCUCA----- +#=GR 10 SS <<<<<<<.....................<<<<<.......>>>>>..... +15 GCCGAAAUAGCUCAGUUGGG--AGAGCGUUAGACUGAAGAUCUAA----- +#=GR 15 SS <<<<<<<.....................<<<<<.......>>>>>..... +16 GGGGAUGUAGCUCAG-UGGU--AGAGCGCAUGCUUCGCAUGUAUG----- +#=GR 16 SS <<<<<<<.....................<<<<<.......>>>>>..... +17 GCAUUGGUGGUUCAG-UGGU--AGAAUUCUCGCCUCCCACGCGGG----- +#=GR 17 SS <<<<<<<.....................<<<<<.......>>>>>..... +18 GCGUUGGUGGUAUAG-UGGU-GAGCAUAGCUGCCUUCCAAGCAGU----- +#=GR 18 SS <<<<<<<.....................<<<<<.......>>>>>..... +23 GGCCGGUUAGCUCAGUUGGU-UAGAGCGUGGUGCUAAUAACGCCA----- +#=GR 23 SS <<<<-<<.....................<<<<<.......>>>>>..... +28 GCAUUGGUGGUUCAG-UGGU--AGAAUUCUCGCCUGCCACGCGGG----- +#=GR 28 SS <<<<<<<.....................<<<<<.......>>>>>..... +30 GGCUCGUUGGUCUAG-GGGU--AUGAUUCUCGCUUAGGGUGCGAG----- +#=GR 30 SS <<<<<<<.....................<<<<<.......>>>>>..... +31 GUUUCCGUAGUGUAG-UGGU-UAUCACGUUCGCCUCACACGCGAA----- +#=GR 31 SS <<<<<<<.....................<<<<<.......>>>>>..... +32 GUUUCCGUAGUGUAG-CGGU-UAUCACAUUCGCCUCACACGCGAA----- +#=GR 32 SS <<<<<<<.....................<<<<<.......>>>>>..... +33 UCCUCGUUAGUAUAG-UGGU-GAGUAUCCCCGCCUGUCACGCGGG----- +#=GR 33 SS <<<<<<<.....................<<<<<.......>>>>>..... +35 UCCCUGGUGGUCUAG-UGGC-UAGGAUUCGGCGCUUUCACCGCCG----- +#=GR 35 SS <-<<<<<.....................<<<<<.......>>>>>..... +37 GGUAGCGUGGCCGAG-CGGUCUAAGGCGCUGGAUUUAGGCUCCAGUCUCU +#=GR 37 SS <<<<<<<.....................<<<<<.......>>>>>..... +46 UCCCUGGUGGUCUAG-UGGU-UAGGAUUCGGCGCUCUCACCGCCG----- +#=GR 46 SS <<<<<<<.....................<<<<<.......>>>>>..... +#=GC SS_cons (((((((,,,,,,,,,,,,,,,,,,,,,<<<<<_______>>>>>,,,,, +#=GC RF GuuUuggUAGUuuAGUUGGUCUAGAAcaUUcgcCUcAcAcgcgAAUCUCu + +7 CCUGGAGG-CGUGGGUUCGAAUCCCACUUCUGACA +#=GR 7 SS ..........<<<<<.......>>>>>>>>>>>>. +9 -----AGGUCCCCGGUUCGAAACCGGGCAGAAGCA +#=GR 9 SS ..........<<<<<.......>>>>>>>>>>>>. +10 -----GGGUCGUGGGUUCGAGCCCCACGUUGGGCG +#=GR 10 SS ..........<<<<<.......>>>>>>>>>>>>. +15 -----AGGUCCCUGGUUCGAUCCCGGGUUUCGGCA +#=GR 15 SS ..........<<<<<.......>>>>>>>>>>>>. +16 -----AGGCCCCGGGUUCGAUCCCCGGCAUCUCCA +#=GR 16 SS ..........<<<<<.......>>>>>>>>>>>>. +17 -----AGA-CCCGGGUUCAAUUCCCGGCCAAUGCA +#=GR 17 SS ..........<<<<<.......>>>>>>>>>>>>. +18 -----UGA-CCCGGGUUCGAUUCCCGGCCAACGCA +#=GR 18 SS ..........<<<<<.......>>>>>>>>>>>>. +23 -----AGGUCGCGGGUUCGAUCCCCGUACGGGCCA +#=GR 23 SS ..........<<<<<.......>>>>>>>->>>>. +28 -----AGG-CCCGGGUUCGAUUCCCGGCCAAUGCA +#=GR 28 SS ..........<<<<<.......>>>>>>>>>>>>. +30 -----AGGUCCCGGGUUCAAAUCCCGGACGAGCCC +#=GR 30 SS ..........<<<<<.......>>>>>>>>>>>>. +31 -----AGGUCCCCGGUUCGAAACCGGGCGGAAACA +#=GR 31 SS ..........<<<<<.......>>>>>>>>>>>>. +32 -----AGGUCCCCGGUUCGAUCCCGGGCGGAAACA +#=GR 32 SS ..........<<<<<.......>>>>>>>>>>>>. +33 -----AGA-CCGGGGUUCGAUUCCCCGACGGGGAG +#=GR 33 SS ..........<<<<<.......>>>>>>>>>>>>. +35 -----CGC-CCCGGGUUCGAUUCCCGGCCAGGAAU +#=GR 35 SS ..........<<<<<.......>>>>>>>>>>->. +37 UCGG-AGG-CGUGGGUUCGAAUCCCACCGCUGCCA +#=GR 37 SS ..........<<<<<.......>>>>>>>>>>>>. +46 -----CGG-CCCGGGUUCGAUUCCCGGUCAGGGAA +#=GR 46 SS ..........<<<<<.......>>>>>>>>>>>>. +#=GC SS_cons ,,,,,,,,,,<<<<<_______>>>>>))))))): +#=GC RF uCuGgAGGUCCCgGGUUCGAUUCCcGGccaAaaCA +//