changeset 3:7fb926851f66 draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/blob/master/tool_collections/kraken/kraken_filter/ commit de7140295cce07e1bc1697e51dab4271c8d7a8a6
author devteam
date Fri, 18 Dec 2015 19:41:19 -0500
parents 317726be0703
children d2c12af1943a
files kraken-filter.xml test-data/kraken_filter_test1.tab test-data/kraken_filter_test1_output.tab test-data/kraken_test1.fa test-data/test_database.loc test-data/test_db/database.idx test-data/test_db/database.kdb test-data/test_db/taxonomy/names.dmp test-data/test_db/taxonomy/nodes.dmp tool_data_table_conf.xml.test
diffstat 10 files changed, 183 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/kraken-filter.xml	Wed Jul 15 15:22:22 2015 -0400
+++ b/kraken-filter.xml	Fri Dec 18 19:41:19 2015 -0500
@@ -5,6 +5,9 @@
     <macros>
         <import>macros.xml</import>
     </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
     <command>
         <![CDATA[
         @SET_DATABASE_PATH@ &&
@@ -19,6 +22,15 @@
     <outputs>
         <data format="tabular" name="filtered_output" />
     </outputs>
+    <tests>
+        <test>
+            <param name="input" value="kraken_filter_test1.tab"/>
+            <param name="threshold" value="0"/>
+            <param name="kraken_database" value="test_db"/>
+            <output name="output" file="kraken_filter_test1_output.tab" ftype="tabular"/>
+        </test>
+    </tests>
+
     <help>
 <![CDATA[
 
@@ -47,8 +59,5 @@
 In this case, ID #561 is the parent node of #562. Here, a label of #562 for this sequence would have a score of C/Q = (13+3)/(13+4+1+3) = 16/21. A label of #561 would have a score of C/Q = (13+4+3)/(13+4+1+3) = 20/21. If a user specified a threshold over 16/21, kraken-filter would adjust the original label from #562 to #561; if the threshold was greater than 20/21, the sequence would become unclassified.
     ]]>
     </help>
-    <expand macro="version_command" />
-    <expand macro="requirements" />
-    <expand macro="stdio" />
     <expand macro="citations" />
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken_filter_test1.tab	Fri Dec 18 19:41:19 2015 -0500
@@ -0,0 +1,4 @@
+C	gi|145231|gb|M33724.1|ECOALPHOA	83333	171	83333:162
+C	gi|145232|gb|M33725.1|ECOALPHOB	83333	183	83333:174
+C	gi|145234|gb|M33727.1|ECOALPHOE	83333	97	83333:88
+C	gi|146195|gb|J01619.1|ECOGLTA	83333	3850	83333:3841
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken_filter_test1_output.tab	Fri Dec 18 19:41:19 2015 -0500
@@ -0,0 +1,4 @@
+C	gi|145231|gb|M33724.1|ECOALPHOA	83333	171	P=1.000	83333:162
+C	gi|145232|gb|M33725.1|ECOALPHOB	83333	183	P=1.000	83333:174
+C	gi|145234|gb|M33727.1|ECOALPHOE	83333	97	P=1.000	83333:88
+C	gi|146195|gb|J01619.1|ECOGLTA	83333	3850	P=1.000	83333:3841
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kraken_test1.fa	Fri Dec 18 19:41:19 2015 -0500
@@ -0,0 +1,70 @@
+>gi|145231|gb|M33724.1|ECOALPHOA Escherichia coli K-12 truncated PhoA (phoA) gene, partial cds; and transposon Mu dI, partial sequence
+CAAAGCTCCGGGCCTCACCCAGGCGCTAAATACCAAAGATGGCGCAGTGATGGTGATGAGTTACGGGAAC
+TCCGAAGAGGATTCACAAGAACATACCGGCAGTCAGTTGCGTATTGCGGCGTATGGCCCGCATGCCGCCA
+ATGAAGCGGCGCACGAAAAACGCGAAAGCGT
+
+>gi|145232|gb|M33725.1|ECOALPHOB Escherichia coli K12 phoA pseudogene and transposon Mu dl-R, partial sequence
+CTGTCATAAAGTTGTCACGGCCGAGACTTATAGTCGCTTTGTTTTTATTTTTTAATGTATTTGTACATGG
+AGAAAATAAAGTGAAACAAAGCACTATTGCACTGGCACTCTTACCGTTACTGTTTACCCCTGTGACAAAA
+GCCCGGACACCAGTGAAGCGGCGCACGAAAAACGCGAAAGCGT
+
+>gi|145234|gb|M33727.1|ECOALPHOE Escherichia coli K12 upstream sequence of psiA5::Mu dI. is identical to psiA30 upstream sequence; putative (phoA) pseudogene and transposon Mu dl-R, partial sequence
+TTGTTTTTATTTTTTAATGTATTTGTACATGGAGAAAATAAAGTGAAACAAAGCACTATTGCACTGGTGA
+AGCGGCGCACGAAAAACGCGAAAGCGT
+
+>gi|146195|gb|J01619.1|ECOGLTA Eschericia coli gltA gene, sdhCDAB operon and sucABCD operons, complete sequence
+GAATTCGACCGCCATTGCGCAAGGCATCGCCATGACCAGGCAGGATACAAAAGAGAGTCGATAAATATTC
+ACGGTGTCCATACCTGATAAATATTTTATGAAAGGCGGCGATGATGCCGCAAAATAATACTTATTTATAA
+TCCAGCACGTAGGTTGCGTTAGCGGTTACTTCACCTGCCGTGACATCGACTGCATTATCAATTTGTTCCA
+TCCAGGCGAAAAAGTTCAGCGTCTGTTCTGATGAGCTTGCATCCAGGTCAAGATCTGGCGCGGCTGAACC
+TAATACGATGTTACCGTCATTTTTGTCCATCAGTCGTACACCGACCCCAGTTGCTTCGCCTGCACTGGTG
+TTGCTCAACAAAGGCGTAGCACCAGTTGTCTTAGCCGTGCTATCGAAGGTTACGCCAAACTTTGGATACC
+GGCATTCCGCTACCGTTGTCAGAAGCAGGCAGATCACAGTTGATCAAGCGAATGTCGACGGCCACTTTAT
+TGCTATGATGCTCCCGGTTTATATGGGTTGTCGTGACTTGTCCAAGATCTATGTTTTTATCAATATCTTC
+TGGATGAATTTCACAAGGTGCTTCAATAACCTCCCCCTTAAAGTGAATTTCGCCAGAACCTTCATCAGCA
+GCATAAACAGGTGCAGTGAACAGCAGAGATACGGCCAGTGCGGCCAATGTTTTTTGTCCTTTAAACATAA
+CAGAGTCCTTTAAGGATATAGAATAGGGGTATAGCTACGCCAGAATATCGTATTTGATTATTGCTAGTTT
+TTAGTTTTGCTTAAAAAATATTGTTAGTTTTATTAAATTGGAAAACTAAATTATTGGTATCATGAATTGT
+TGTATGATGATAAATATAGGGGGGATATGATAGACGTCATTTTCATAGGGTTATAAAATGCGACTACCAT
+GAAGTTTTTAATTCAAAGTATTGGGTTGCTGATAATTTGAGCTGTTCTATTCTTTTTAAATATCTATATA
+GGTCTGTTAATGGATTTTATTTTTACAAGTTTTTTGTGTTTAGGCATATAAAAATCAAGCCCGCCATATG
+AACGGCGGGTTAAAATATTTACAACTTAGCAATCGAACCATTAACGCTTGATATCGCTTTTAAAGTCGCG
+TTTTTCATATCCTGTATACAGCTGACGCGGACGGGCAATCTTCATACCGTCACTGTGCATTTCGCTCCAG
+TGGGCGATCCAGCCAACGGTACGTGCCATTGCGAAAATGACGGTGAACATGGAAGACGGAATACCCATCG
+CTTTCAGGATGATACCAGAGTAGAAATCGACGTTCGGGTACAGTTTCTTCTCGATAAAGTACGGGTCGTT
+CAGCGCGATGTTTTCCAGCTCCATAGCCACTTCCAGCAGGTCATCCTTCGTGCCCAGCTCTTTCAGCACT
+TCATGGCAGGTTTCACGCATTACGGTGGCGCGCGGGTCGTAATTTTTGTACACGCGGTGACCGAAGCCCA
+TCAGGCGGAAAGAATCATTTTTGTCTTTCGCACGACGAAAAAATTCCGGAATGTGTTTAACGGAGCTGAT
+TTCTTCCAGCATTTTCAGCGCCGCTTCGTTAGCACCGCCGTGCGCAGGTCCCCACAGTGAAGCAATACCT
+GCTGCGATACAGGCAAACGGGTTCGCACCCGAAGAGCCAGCGGTACGCACGGTGGAGGTAGAGGCGTTCT
+GTTCATGGTCAGCGTGCAGGATCAGAATACGGTCCATAGCACGTTCCAGAATCGGATTAACTTCATACGG
+TTCGCACGGCGTGGAGAACATCATATTCAGGAAGTTACCGGCGTAGGAGAGATCGTTGCGCGGGTAAACA
+AATGGCTGACCAATGGAATACTTGTAACACATCGCGGCCATGGTCGGCATTTTCGACAGCAGGCGGAACG
+CGGCAATTTCACGGTGACGAGGATTGTTAACATCCAGCGAGTCGTGATAGAACGCCGCCAGCGCGCCGGT
+AATACCACACATGACTGCCATTGGATGCGAGTCGCGACGGAAAGCATGGAACAGACGGGTAATCTGCTCG
+TGGATCATGGTATGACGGGTCACCGTAGTTTTAAATTCGTCATACTGTTCCTGAGTCGGTTTTTCACCAT
+TCAGCAGGATGTAACAAACTTCCAGGTAGTTAGAATCGGTCGCCAGCTGATCGATCGGGAAACCGCGGTG
+CAGCAAAATACCTTCATCACCATCAATAAAAGTAATTTTAGATTCGCAGGATGCGGTTGAAGTGAAGCCT
+GGGTCAAAGGTGAACACACCTTTTGAACCGAGAGTACGGATATCAATAACATCTTGACCCAGCGTGCCTT
+TCAGCACATCCAGTTCAACAGCTGTATCCCCGTTGAGGGTGAGTTTTGCTTTTGTATCAGCCATTTAAGG
+TCTCCTTAGCGCCTTATTGCGTAAGACTGCCGGAACTTAAATTTGCCTTCGCACATCAACCTGGCTTTAC
+CCGTTTTTTATTTGGCTCGCCGCTCTGTGAAAGAGGGGAAAACCTGGGTACAGAGCTCTGGGCGCTTGCA
+GGTAAAGGATCCATTGATGACGAATAAATGGCGAATCAAGTACTTAGCAATCCGAATTATTAAACTTGTC
+TACCACTAATAACTGTCCCGAATGAATTGGTCAATACTCCACACTGTTACATAAGTTAATCTTAGGTGAA
+ATACCGACTTCATAACTTTTACGCATTATATGCTTTTCCTGGTAATGTTTGTAACAACTTTGTTGAATGA
+TTGTCAAATTAGATGATTAAAAATTAAATAAATGTTGTTATCGTGACCTGGATCACTGTTCAGGATAAAA
+CCCGACAAACTATATGTAGGTTAATTGTAATGATTTTGTGAACAGCCTATACTGCCGCCAGTCTCCGGAA
+CACCCTGCAATCCCGAGCCACCCAGCGTTGTAACGTGTCGTTTTCGCATCTGGAAGCAGTGTTTTGCATG
+ACGCGCAGTTATAGAAAGGACGCTGTCTGACCCGCAAGCAGACCGGAGGAAGGAAATCCCGACGTCTCCA
+GGTAACAGAAAGTTAACCTCTGTGCCCGTAGTCCCCAGGGAATAATAAGAACAGCATGTGGGCGTTATTC
+ATGATAAGAAATGTGAAAAAACAAAGACCTGTTAATCTGGACCTACAGACCATCCGGTTCCCCATCACGG
+CGATAGCGTCCATTCTCCATCGCGTTTCCGGTGTGATCACCTTTGTTGCAGTGGGCATCCTGCTGTGGCT
+TCTGGGTACCAGCCTCTCTTCCCCTGAAGGTTTCGAGCAAGCTTCCGCGATTATGGGCAGCTTCTTCGTC
+AAATTTATCATGTGGGGCATCCTTACCGCTCTGGCGTATCACGTCGTCGTAGGTATTCGCCACATGATGA
+TGGATTTTGGCTATCTGGAAGAAACATTCGAAGCGGGTAAACGCTCCGCCAAAATCTCCTTTGTTATTAC
+TGTCGTGCTTTCACTTCTCGCAGGAGTCCTCGTATGGTAAGCAACGCCTCCGCATTAGGACGCAATGGCG
+TACATGATTTCATCCTCGTTCGCGCTACCGCTATCGTCCTGACGCTCTACATCATTTATATGGTCGGTTT
+TTTCGCTACCAGTGGCGAGCTGACATATGAAGTCTGGATCGGTTTCTTCGCCTCTGCGTTCACCAAAGTG
+TTCACCCTGCTGGCGCTGTTTTCTATCTTGATCCATGCCTGGATCGGCATGTGGCAGGTGTTGACCGACT
+ACGTTAAACCGCTGGCTTTGCGCCTGATGCTGCAACTGGTGATTGTCGTTGCACTGGTGGTTTACGTGAT
+TTATGGATTCGTTGTGGTGTGGGGTGTGTGATGAAATTGCCAGTCAGAGAATTTGATGCAGTTGTGATTG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_database.loc	Fri Dec 18 19:41:19 2015 -0500
@@ -0,0 +1,1 @@
+test_db	test_db	${__HERE__}
\ No newline at end of file
Binary file test-data/test_db/database.idx has changed
Binary file test-data/test_db/database.kdb has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/taxonomy/names.dmp	Fri Dec 18 19:41:19 2015 -0500
@@ -0,0 +1,74 @@
+83333	|	Escherichia coli K-12	|		|	scientific name	|
+83333	|	Escherichia coli K12	|		|	equivalent name	|
+562	|	"Bacillus coli" Migula 1895	|		|	authority	|
+562	|	"Bacterium coli commune" Escherich 1885	|		|	authority	|
+562	|	"Bacterium coli" (Migula 1895) Lehmann and Neumann 1896	|		|	authority	|
+562	|	ATCC 11775	|		|	type material	|
+562	|	Bacillus coli	|		|	synonym	|
+562	|	Bacterium coli	|		|	synonym	|
+562	|	Bacterium coli commune	|		|	synonym	|
+562	|	CCUG 24	|		|	type material	|
+562	|	CCUG 29300	|		|	type material	|
+562	|	CIP 54.8	|		|	type material	|
+562	|	DSM 30083	|		|	type material	|
+562	|	Enterococcus coli	|		|	synonym	|
+562	|	Escherchia coli	|		|	misspelling	|
+562	|	Escherichia coli	|		|	scientific name	|
+562	|	Escherichia coli (Migula 1895) Castellani and Chalmers 1919	|		|	authority	|
+562	|	Escherichia sp. MAR	|		|	includes	|
+562	|	Escherichia/Shigella coli	|		|	equivalent name	|
+562	|	Eschericia coli	|		|	misspelling	|
+562	|	JCM 1649	|		|	type material	|
+562	|	LMG 2092	|		|	type material	|
+562	|	NBRC 102203	|		|	type material	|
+562	|	NCCB 54008	|		|	type material	|
+562	|	NCTC 9001	|		|	type material	|
+562	|	bacterium 10a	|		|	includes	|
+562	|	bacterium E3	|		|	includes	|
+561	|	Escherchia	|		|	misspelling	|
+561	|	Escherichia	|		|	scientific name	|
+561	|	Escherichia Castellani and Chalmers 1919	|		|	authority	|
+543	|	Enterobacteraceae	|		|	synonym	|
+543	|	Enterobacteraceae (ex Lapage 1979) Lapage 1982, fam. nov., nom. rev.	|		|	synonym	|
+543	|	Enterobacteriaceae	|		|	scientific name	|
+543	|	Enterobacteriaceae (ex Rahn 1937) Ewing et al. 1980, fam. nov., nom. rev.	|		|	synonym	|
+543	|	Enterobacteriaceae Rahn 1937	|		|	synonym	|
+543	|	gamma-3 proteobacteria	|	gamma-3 proteobacteria <#1>	|	in-part	|
+91347	|	'Enterobacteriales'	|		|	synonym	|
+91347	|	Enterobacteriaceae and related endosymbionts	|		|	synonym	|
+91347	|	Enterobacteriaceae group	|		|	synonym	|
+91347	|	Enterobacteriales	|		|	scientific name	|
+91347	|	enterobacteria	|	enterobacteria<blast91347>	|	blast name	|
+91347	|	gamma-3 proteobacteria	|	gamma-3 proteobacteria <#5>	|	in-part	|
+1236	|	Gammaproteobacteria	|		|	scientific name	|
+1236	|	Gammaproteobacteria Garrity et al. 2005	|		|	synonym	|
+1236	|	Proteobacteria gamma subdivision	|		|	synonym	|
+1236	|	Purple bacteria, gamma subdivision	|		|	synonym	|
+1236	|	g-proteobacteria	|	gamma proteos<blast1236>	|	blast name	|
+1236	|	gamma proteobacteria	|		|	synonym	|
+1236	|	gamma subdivision	|		|	synonym	|
+1236	|	gamma subgroup	|		|	synonym	|
+1224	|	Proteobacteria	|		|	scientific name	|
+1224	|	Proteobacteria Garrity et al. 2005	|		|	authority	|
+1224	|	Proteobacteria [class] Stackebrandt et al. 1988	|		|	authority	|
+1224	|	not Proteobacteria Cavalier-Smith 2002	|		|	authority	|
+1224	|	proteobacteria	|	proteobacteria<blast1224>	|	blast name	|
+1224	|	purple bacteria	|		|	common name	|
+1224	|	purple bacteria and relatives	|		|	common name	|
+1224	|	purple non-sulfur bacteria	|		|	common name	|
+1224	|	purple photosynthetic bacteria	|		|	common name	|
+1224	|	purple photosynthetic bacteria and relatives	|		|	common name	|
+2	|	Bacteria	|	Bacteria <prokaryote>	|	scientific name	|
+2	|	Monera	|	Monera <Bacteria>	|	in-part	|
+2	|	Procaryotae	|	Procaryotae <Bacteria>	|	in-part	|
+2	|	Prokaryota	|	Prokaryota <Bacteria>	|	in-part	|
+2	|	Prokaryotae	|	Prokaryotae <Bacteria>	|	in-part	|
+2	|	bacteria	|	bacteria <blast2>	|	blast name	|
+2	|	eubacteria	|		|	genbank common name	|
+2	|	not Bacteria Haeckel 1894	|		|	synonym	|
+2	|	prokaryote	|	prokaryote <Bacteria>	|	in-part	|
+2	|	prokaryotes	|	prokaryotes <Bacteria>	|	in-part	|
+1	|	all	|		|	synonym	|
+1	|	root	|		|	scientific name	|
+131567	|	biota	|		|	synonym	|
+131567	|	cellular organisms	|		|	scientific name	|
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/taxonomy/nodes.dmp	Fri Dec 18 19:41:19 2015 -0500
@@ -0,0 +1,10 @@
+83333	|	562	|	no rank	|		|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+562	|	561	|	species	|	EC	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+561	|	543	|	genus	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+543	|	91347	|	family	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+91347	|	1236	|	order	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1236	|	1224	|	class	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1224	|	2	|	phylum	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+2	|	131567	|	superkingdom	|		|	0	|	0	|	11	|	0	|	0	|	0	|	0	|	0	|		|
+131567	|	1	|	no rank	|		|	8	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+1	|	1	|	no rank	|		|	8	|	0	|	1	|	0	|	0	|	0	|	0	|	0	|		|
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Fri Dec 18 19:41:19 2015 -0500
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of Kraken database in the required format -->
+    <table name="kraken_databases" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/test_database.loc" />
+    </table>
+</tables>