Mercurial > repos > crusoe > khmer

--- a/README.txt	Mon Aug 18 07:02:05 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-Requires galaxy-central dating from 2014-06-30 or newer
-
-https://bitbucket.org/galaxy/galaxy-central/commits/4de240e5a7819c768b8267c19e477530dab54370
-
-
--- a/abundance-dist-single.xml	Mon Aug 18 07:02:05 2014 -0400
+++ b/abundance-dist-single.xml	Tue Jul 07 11:59:39 2015 -0400
@@ -1,18 +1,19 @@
 <tool	id="gedlab-khmer-abundance-dist-single"
 	name="Abundance Distribution (all-in-one)"
-	version="1.1-1"
-	force_history_refresh="true">
-
+	version="2.0rc1-1">
+
 	<description>
-		Calculate abundance distribution of the k-mers in a given sequence
-		file.
+		Calculate abundance distribution of the k-mers in a given
+		sequence file.
 	</description>
 	<macros>
 		<token name="@BINARY@">abundance-dist-single.py</token>
 		<import>macros.xml</import>
 	</macros>
 	<expand macro="requirements" />
-	<command>
+	<expand macro="stdio" />
+	<expand macro="version" />
+	<command><![CDATA[
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 mkdir output; cd output;
@@ -27,7 +28,8 @@
 --squash
 @THREADS@
 $input_sequence_filename
-$output_histogram_filename
+$output_histogram_filename
+]]>
 	</command>

 	<inputs>
@@ -51,9 +53,8 @@
 			label="${tool.name} k-mer counting table">
 			<filter>save_countingtable == True</filter>
 		</data>
-		<expand macro="abundance-histogram-output" />
+		<expand macro="abundance-histogram-output" />
 	</outputs>
-	<expand macro="stdio" />
     <tests>
 	    <test>
 		    <param name="input_sequence_filename" value="test-abund-read-2.fa" />
@@ -86,13 +87,20 @@
 	    </test>

     </tests>
+    <help><![CDATA[
+Calculate the abundance distribution of k-mers from a single sequence file.
+
+Note that with :option:`-b` this script is constant memory; in exchange,
+k-mer counts will stop at 255. The memory usage of this script with
+:option:`-b` will be about 1.15x the product of the :option:`-x` and
+:option:`-N` numbers.
+
+To count k-mers in multiple files use :program:`load_into_counting.py` and
+:program:`abundance_dist.py`.
+]]>
+    </help>
     <citations>
 	<expand macro="software-citation" />
 	<expand macro="counting-citation" />
     </citations>
-    <!-- [OPTIONAL] Help displayed in Galaxy -->
-    <!--
-	<help>
-	</help>
-    -->
 </tool>
--- a/abundance-dist.xml	Mon Aug 18 07:02:05 2014 -0400
+++ b/abundance-dist.xml	Tue Jul 07 11:59:39 2015 -0400
@@ -1,8 +1,7 @@
 <tool	id="gedlab-khmer-abundance-dist"
 	name="Abundance Distribution"
-	version="1.1-1"
-	force_history_refresh="true">
-
+	version="2.0rc1-1">
+
 	<description>
 		Calculate abundance distribution of the k-mers in a given sequence
 		file using a pre-made k-mer counting table.
@@ -11,8 +10,10 @@
 		<token name="@BINARY@">abundance-dist.py</token>
 		<import>macros.xml</import>
         </macros>
-        <expand macro="requirements" />
-	<command>
+	<expand macro="requirements" />
+	<expand macro="stdio" />
+	<expand macro="version" />
+	<command><![CDATA[
 ## The command is a Cheetah template which allows some Python based syntax.
 ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
 mkdir output; cd output;
@@ -20,18 +21,18 @@
 --squash
 $input_counting_table_filename
 $input_sequence_filename
-$output_histogram_filename
+$output_histogram_filename
+]]>
 	</command>

 	<inputs>
+		<expand macro="input_counting_table_filename" />
 		<expand macro="input_sequence_filename" />
-		<expand macro="input_counting_table_filename" />
 		<expand macro="input_zero" />
 	</inputs>
 	<outputs>
 		<expand macro="abundance-histogram-output" />
 	</outputs>
-	<expand macro="stdio" />
 	<tests>
 		<test>
                     	<param name="input_sequence_filename" value="test-abund-read-2.fa" />
@@ -47,7 +48,10 @@
 		<test>
                     	<param name="input_sequence_filename" value="test-abund-read-2.fa" />
  			<param name="input_counting_table_filename" value="test-abund-read-2.nobigcount.ct" ftype="ct" />
- 	                <param name="no_zero" value="false" />
+			<param name="no_zero" value="false" />
+			<assert_stderr>
+				<has_line_matching expression="WARNING: The loaded graph has bigcount" />
+			</assert_stderr>
                     	<output name="output_histogram_filename">
                             	<assert_contents>
                                     	<has_line_matching expression="1 96 96 0.98" />
@@ -56,13 +60,13 @@
                     	</output>
             	</test>
     	</tests>
+	<help><![CDATA[
+Calculate abundance distribution of the k-mers in the sequence file using a
+pre-made k-mer counting table.
+]]>
+	</help>
 	<citations>
 		<expand macro="software-citation" />
 		<expand macro="counting-citation" />
 	</citations>
-    <!-- [OPTIONAL] Help displayed in Galaxy -->
-    <!--
-	<help>
-	</help>
-    -->
 </tool>
--- a/count-median.xml	Mon Aug 18 07:02:05 2014 -0400
+++ b/count-median.xml	Tue Jul 07 11:59:39 2015 -0400
@@ -1,8 +1,7 @@
 <tool	id="gedlab-khmer-count-median"
 	name="Count Median"
-	version="1.1-1"
-	force_history_refresh="true">
-
+	version="2.0rc1-1">
+
 	<description>
 		Count the median/avg k-mer abundance for each sequence in the
 		input file, based on the k-mer counts in the given k-mer
@@ -13,12 +12,15 @@
 		<token name="@BINARY@">count-median.py</token>
 		<import>macros.xml</import>
         </macros>
-        <expand macro="requirements" />
-	<command>
+	<expand macro="requirements" />
+	<expand macro="stdio" />
+	<expand macro="version" />
+	<command><![CDATA[
 @BINARY@
 $input_counting_table_filename
 $input_sequence_filename
 $output_summary_filename
+]]>
 	</command>

 	<inputs>
@@ -26,12 +28,11 @@
 		<expand macro="input_counting_table_filename" />
 	</inputs>
 	<outputs>
-		<data name="output_summary_filename" format="text"
+		<data name="output_summary_filename" format="txt"
 			label="${input_sequence_filename} sequence id, median, average, stddev, and seq length" />
 	</outputs>
-	<expand macro="stdio" />
 	<tests>
-		<test interactor="api">
+		<test>
 			<param name="input_sequence_filename"
 				value="test-abund-read-2.fa" />
 			<param name="input_counting_table_filename"
@@ -46,13 +47,18 @@
                         </output>
 		</test>
 	</tests>
+	<help>
+Count the median/avg k-mer abundance for each sequence in the input file,
+based on the k-mer counts in the given k-mer counting table. Can be used to
+estimate expression levels (mRNAseq) or coverage (genomic/metagenomic). The
+output file contains sequence id, median, average, stddev, and seq length;
+fields are separated by spaces. For khmer 1.x count-median.py will split
+sequence names at the first space which means that some sequence formats (e.g.
+paired FASTQ in Casava 1.8 format) will yield uninformative names. Use
+:option:`--csv` to fix this behavior.
+	</help>
 	<citations>
 		<expand macro="software-citation" />
 		<expand macro="diginorm-citation" />
 	</citations>
-    <!-- [OPTIONAL] Help displayed in Galaxy -->
-    <!--
-	<help>
-	</help>
-    -->
 </tool>
--- a/datatypes_conf.xml	Mon Aug 18 07:02:05 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-<?xml version="1.0"?>
-<datatypes>
-    <datatype_files>
-        <datatype_file name="gedlab.py"/>
-    </datatype_files>
-  <registration>
-    <datatype extension="ct" type="galaxy.datatypes.gedlab:Count" mimetype="application/octet-stream" display_in_upload="true"/>
-    <datatype extension="pt" type="galaxy.datatypes.gedlab:Presence" mimetype="application/octet-stream" display_in_upload="true"/>
-  </registration>
-  <sniffers>
-    <sniffer type="galaxy.datatypes.gedlab:Count"/>
-    <sniffer type="galaxy.datatypes.gedlab:Presence"/>
-  </sniffers>
-</datatypes>
--- a/do-partition.xml	Mon Aug 18 07:02:05 2014 -0400
+++ b/do-partition.xml	Tue Jul 07 11:59:39 2015 -0400
@@ -1,8 +1,7 @@
 <tool	id="gedlab-khmer-do-partition"
 	name="Sequence partition all-in-one"
-	version="1.1-1"
-	force_history_refresh="true">
-
+	version="2.0rc1-1">
+
 	<description>
 		Load, partition, and annotate FAST[AQ] sequences
 	</description>
@@ -10,8 +9,10 @@
                 <token name="@BINARY@">do-parition.py</token>
                 <import>macros.xml</import>
         </macros>
-        <expand macro="requirements" />
-	<command>
+	<expand macro="requirements" />
+	<expand macro="stdio" />
+	<expand macro="version" />
+	<command><![CDATA[
 mkdir -p output;
 @BINARY@
 @TABLEPARAMS@
@@ -22,6 +23,7 @@
 #end for ;
 mv output.info $infomation ;
 mv *.part output/
+]]>
 	</command>

 	<inputs>
@@ -48,60 +50,31 @@
 			label="${tool.name} summary for #echo ','.join(map(str, $inputs ))#" />
 		<expand macro="output_sequences" />
 	</outputs>
-	<expand macro="stdio" />
-<!--	<tests>
-		<test interactor="api">
-			<conditional name="parameters">
-				<param name="type" value="specific" />
-				<param name="inputs" value="test-abund-read-2.fa"/>
-				<param name="cutoff" value="1" />
-				<param name="ksize" value="17" />
-			</conditional>
+	<tests>
+		<test>
+                        <param name="inputs" value="random-20-a.fa"/>
 			<output name="output">
-				<discover_dataset name="test-abund-read-2.fa.keep">
-					<assert_contents>
-						<has_line_matching expression="GGTTGACGGGGCTCAGGGGG" />
-					</assert_contents>
-				</discover_dataset>
-			</output>
-		</test>
-		<test interactor="api">
-			<param name="inputs" value="test-abund-read-2.fa" />
-			<param name="cutoff" value="2" />
-			<param name="ksize" value="17" />
-			<output name="output">
-				<discover_dataset name="test-abund-read-2.fa.keep">
+				<discovered_dataset designation="random-20-a.fa.part">
 					<assert_contents>
-						<has_line_matching expression="GGTTGACGGGGCTCAGGGGG" />
-						<has_line_matching expression="GGTTGACGGGGCTCAGGG" />
+						<has_text text='>35     2' />
 					</assert_contents>
-				</discover_dataset>
-			</output>
-		</test>
-		<test interactor="api">
-			<param name="inputs" value="test-abund-read-paired.fa" />
-			<param name="cutoff" value="1" />
-			<param name="ksize" value="17" />
-			<param name="paired" value="true" />
-			<output name="output">
-				<discover_dataset name="test-abund-read-paired.fa.keep">
-					<assert_contents>
-						<has_line_matching expression="GGTTGACGGGGCTCAGGGGG" />
-						<has_line_matching expression="GGTTGACGGGGCTCAGGG" />
-					</assert_contents>
-				</discover_dataset>
-			</output>
-		</test>
+				</discovered_dataset>
+                        </output>
+                </test>
+	</tests>
+	<help><![CDATA[
+Load in a set of sequences, partition them, merge the partitions, and
+annotate the original sequences files with the partition information.

-	</tests>
-    -->
+This script combines the functionality of :program:`load-graph.py`,
+:program:`partition-graph.py`, :program:`merge-partitions.py`, and
+:program:`annotate-partitions.py` into one script. This is convenient
+but should probably not be used for large data sets, because
+:program:`do-partition.py` doesn't provide save/resume functionality.
+]]>
+	</help>
 	<citations>
 		<expand macro="software-citation" />
 		<expand macro="graph-citation" />
 	</citations>
-    <!-- [OPTIONAL] Help displayed in Galaxy -->
-    <!--
-	<help>
-	</help>
-    -->
 </tool>
--- a/extract-partitions.xml	Mon Aug 18 07:02:05 2014 -0400
+++ b/extract-partitions.xml	Tue Jul 07 11:59:39 2015 -0400
@@ -1,8 +1,7 @@
 <tool	id="gedlab-khmer-extract-partitions"
 	name="Extract partitions"
-	version="1.1-1"
-	force_history_refresh="true">
-
+	version="2.0rc1-1">
+
 	<description>
 		Separate sequences that are annotated with partitions into
 		grouped files.
@@ -11,19 +10,23 @@
                 <token name="@BINARY@">extract-partitions.py</token>
                 <import>macros.xml</import>
         </macros>
-        <expand macro="requirements" />
-	<command>
+	<expand macro="requirements" />
+	<expand macro="stdio" />
+	<expand macro="version" />
+	<command><![CDATA[
 mkdir -p output ;
 cd output ;
 @BINARY@
 --max-size $max_size
 --min-partition-size $min_partition_size
-$output_unasssigned
+$output_unassigned
 output
 #for input in $inputs
 $input
-#end for ;
+#end for
+;
 mv output.dist $distribution
+]]>
 	</command>

 	<inputs>
@@ -51,27 +54,25 @@
 			label="Partition size distribution from ${tool.name}" />
 		<expand macro="output_sequences" />
 	</outputs>
-	<expand macro="stdio" />
-
+
 	<tests>
-		<test interactor="api">
+		<test>
 			<param name="inputs" value="random-20-a.fa.part"/>
 			<output name="distribution">
 				<assert_contents>
 					<has_line_matching
-						expression="99 1 1 99" />
+						expression="90 1 3 98" />
 				</assert_contents>
 			</output>
 		</test>

 	</tests>
+	<help><![CDATA[
+Separate sequences that are annotated with partitions into grouped files.
+]]>
+	</help>
 	<citations>
 		<expand macro="software-citation" />
 		<expand macro="graph-citation" />
 	</citations>
-    <!-- [OPTIONAL] Help displayed in Galaxy -->
-    <!--
-	<help>
-	</help>
-    -->
 </tool>
--- a/filter-abund.xml	Mon Aug 18 07:02:05 2014 -0400
+++ b/filter-abund.xml	Tue Jul 07 11:59:39 2015 -0400
@@ -1,8 +1,7 @@
 <tool	id="gedlab-khmer-filter-abund"
 	name="Filter by abundance"
-	version="1.1-1"
-	force_history_refresh="true">
-
+	version="2.0rc1-1">
+
 	<description>
 		Trims fastq/fasta sequences at k-mers of a given abundance
 		based on a provided k-mer counting table.
@@ -12,7 +11,9 @@
 		<import>macros.xml</import>
 	</macros>
 	<expand macro="requirements" />
-	<command>
+	<expand macro="stdio" />
+	<expand macro="version" />
+	<command><![CDATA[
 mkdir output; cd output;
 @BINARY@
 #if $cutoff != 2
@@ -25,6 +26,7 @@
  $input
 #end for
 --out $output
+]]>
 	</command>

 	<inputs>
@@ -41,15 +43,14 @@
 			value="2"
 			label="cutoff"
 			help="Trim at k-mers below this abundance. (--cutoff)" />
-		<expand macro="input_counting_table_filename" />
+		<expand macro="input_counting_table_filename" />
 	</inputs>
 	<outputs>
 		<!-- <expand macro="output_sequences" /> -->
 		<expand macro="output_sequences_single" />
 	</outputs>
-	<expand macro="stdio" />
 	<tests>
-                <test interactor="api">
+                <test>
                         <param name="inputs" value="test-abund-read-2.fa" />
 			<param name="input_counting_table_filename"
 				value="test-abund-read-2.ct" ftype="ct" />
@@ -61,7 +62,7 @@
 				<!-- </discover_dataset> -->
                         </output>
                 </test>
-                <test interactor="api">
+                <test>
 			<param name="input_sequence_filename"
 				value="test-abund-read-2.fa" />
 			<param name="input_counting_table_filename"
@@ -75,14 +76,17 @@
 				<!-- </discover_dataset> -->
                         </output>
                 </test>
-        </tests>
+        </tests>
+	<help><![CDATA[
+Trim sequences at a minimum k-mer abundance.
+
+Trimmed sequences will be placed in ${input_sequence_filename}.abundfilt
+for each input sequence file. If the input sequences are from RNAseq or
+metagenome sequencing then :option:`--variable-coverage` should be used.
+]]>
+	</help>
 	<citations>
 		<expand macro="software-citation" />
 		<expand macro="counting-citation" />
 	</citations>
-	<!-- [OPTIONAL] ReST Help displayed in Galaxy -->
-    <!--
-	<help>
-	</help>
-    -->
 </tool>
--- a/filter-below-abund.py	Mon Aug 18 07:02:05 2014 -0400
+++ b/filter-below-abund.py	Tue Jul 07 11:59:39 2015 -0400
@@ -1,12 +1,12 @@
-#! /usr/bin/env python2
+#! /usr/bin/env python
 #
-# This file is part of khmer, http://github.com/ged-lab/khmer/, and is
-# Copyright (C) Michigan State University, 2009-2013. It is licensed under
-# the three-clause BSD license; see doc/LICENSE.txt.
+# This file is part of khmer, https://github.com/dib-lab/khmer/, and is
+# Copyright (C) Michigan State University, 2009-2015. It is licensed under
+# the three-clause BSD license; see LICENSE.
 # Contact: khmer-project@idyll.org
 #
+from __future__ import print_function
 import sys
-import screed.fasta
 import os
 import khmer
 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter
@@ -23,17 +23,17 @@
     counting_ht = sys.argv[1]
     infiles = sys.argv[2:]

-    print 'file with ht: %s' % counting_ht
-    print '-- settings:'
-    print 'N THREADS', WORKER_THREADS
-    print '--'
+    print('file with ht: %s' % counting_ht)
+    print('-- settings:')
+    print('N THREADS', WORKER_THREADS)
+    print('--')

-    print 'making hashtable'
+    print('making hashtable')
     ht = khmer.load_counting_hash(counting_ht)
     K = ht.ksize()

     for infile in infiles:
-        print 'filtering', infile
+        print('filtering', infile)
         outfile = os.path.basename(infile) + '.below'

         outfp = open(outfile, 'w')
--- a/filter-below-abund.xml	Mon Aug 18 07:02:05 2014 -0400
+++ b/filter-below-abund.xml	Tue Jul 07 11:59:39 2015 -0400
@@ -1,7 +1,6 @@
 <tool	id="gedlab-khmer-filter-below-abund"
 	name="Filter below abundance cutoff of 50"
-	version="1.1-1"
-	force_history_refresh="true">
+	version="2.0rc1-1">

 <!-- Work in progress, gating on filter-below-abund.py being upgraded -->
 	<description>
@@ -13,6 +12,8 @@
 		<import>macros.xml</import>
 	</macros>
 	<expand macro="requirements" />
+	<expand macro="stdio" />
+	<expand macro="version" />
 	<command>
 mkdir output; cd output;
 @BINARY@
@@ -24,15 +25,14 @@

 	<inputs>
 		<expand macro="input_sequences_filenames" />
-		<expand macro="input_counting_table_filename" />
+		<expand macro="input_counting_table_filename" />
 	</inputs>
 	<outputs>
 		<!-- <expand macro="output_sequences" /> -->
 		<expand macro="output_sequences_single" />
 	</outputs>
-	<expand macro="stdio" />
 	<!--        <tests>
-                <test interactor="api">
+                <test>
                         <param name="inputs" value="test-abund-read-2.fa" />
                         <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" />
                         <output name="output">
@@ -40,7 +40,7 @@
 				</discover_dataset>
                         </output>
                 </test>
-                <test interactor="api">
+                <test>
                         <param name="input_sequence_filename" value="test-abund-read-2.fa" />
                         <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" />
 			<param name="cutoff" value="1" />
@@ -61,5 +61,5 @@
     <!--
 	<help>
 	</help>
-    -->
+    -->
 </tool>
--- a/gedlab.py	Mon Aug 18 07:02:05 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-"""
-k-mer count and presence
-"""
-
-from galaxy.datatypes.binary import Binary
-
-import os
-import logging
-
-log = logging.getLogger(__name__)
-
-
-class Count(Binary):
-
-    def __init__(self, **kwd):
-        Binary.__init__(self, **kwd)
-
-
-class Presence(Binary):
-
-    def __init__(self, **kwd):
-        Binary.__init__(self, **kwd)
-
-Binary.register_unsniffable_binary_ext("ct")
-Binary.register_unsniffable_binary_ext("pt")
--- a/macros.xml	Mon Aug 18 07:02:05 2014 -0400
+++ b/macros.xml	Tue Jul 07 11:59:39 2015 -0400
@@ -1,19 +1,20 @@
 <macros>
 	<xml name="requirements">
 		<requirements>
-			<!-- <requirement type="binary">@BINARY@</requirement> -->
-			<requirement type="package" version="1.1">khmer</requirement>
+			<requirement type="package" version="2.0rc1">khmer</requirement>
 		</requirements>
+	</xml>
+	<xml name="version">
 		<version_command>@BINARY@ --version</version_command>
 	</xml>
 	<token name="@TABLEPARAMS@">#if $parameters.type == "simple"
   --ksize=20
   --n_tables=4
-  --min-tablesize=$parameters.tablesize
+  --max-tablesize=$parameters.tablesize
 #else
   --ksize=$parameters.ksize
   --n_tables=$parameters.n_tables
-  --min-tablesize=$parameters.tablesize_specific
+  --max-tablesize="$parameters.tablesize_specific"
   #end if</token>
 	<token name="@THREADS@">--threads \${GALAXY_SLOTS:-4}</token>
 	<xml name="tableinputs">
@@ -93,7 +94,7 @@
 	</xml>
 	<xml name="abundance-histogram-output">
                 <data   name="output_histogram_filename"
-                        format="text"
+                        format="txt"
                         label="${tool.name} k-mer abundance histogram. The
                         columns are: (1) k-mer abundance, (2) k-mer count, (3)
                         cumulative count, (4) fraction of total distinct
@@ -102,14 +103,14 @@
 	</xml>
 	<xml name="output_sequences">
                 <data   name="output"
-                        format="input"
+                        format_source="inputs"
                         label="${tool.name} processed nucleotide sequence file">
                         <discover_datasets pattern="__name__" directory="output" visible="true"/>
                 </data>
 	</xml>
 	<xml name="output_sequences_single">
                 <data   name="output"
-                        format="input"
+                        format_source="input_sequence_filename"
                         label="${tool.name} processed nucleotide sequence file" />
 	</xml>
 	<xml name="input_zero">
@@ -152,7 +153,6 @@
 	</xml>
 	<xml name="stdio">
 	<stdio>
-        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
 		<exit_code	range="1:"
 				level="fatal" />
 	</stdio>
--- a/normalize-by-median.xml	Mon Aug 18 07:02:05 2014 -0400
+++ b/normalize-by-median.xml	Tue Jul 07 11:59:39 2015 -0400
@@ -1,8 +1,7 @@
 <tool	id="gedlab-khmer-normalize-by-median"
 	name="Normalize By Median"
-	version="1.1-4"
-	force_history_refresh="true">
-
+	version="2.0rc1-1">
+
 	<description>
 		Filters a fastq/fasta file using digital normalization via
 	    	median k-mer abundances.
@@ -12,7 +11,9 @@
                 <import>macros.xml</import>
         </macros>
         <expand macro="requirements" />
-	<command>
+	<expand macro="stdio" />
+	<expand macro="version" />
+	<command><![CDATA[
 mkdir output;
 cd output;
 normalize-by-median.py
@@ -32,8 +33,8 @@
 #end for
 #end for
 --out=$output
+]]>
 	</command>
-
 	<inputs>
 		<repeat name="many_inputs" title="input(s) set" min="1" default="1">
 			<expand macro="input_sequences_filenames" />
@@ -44,24 +45,25 @@
 			truevalue="--paired"
 			falsevalue=""
 			label="Are the inputs interleaved paired ends?"
-			help="If so, then selecting this option will process the paired ends together." />
+			help="(--paired) If so, then selecting this option will process the paired ends together." />

 		<param	name="countingtable_to_load"
 			type="data"
 			format="ct"
 			optional="true"
 			label="an optional k-mer counting table to load"
-			help="The inputs file(s) will be processed using the kmer counts in the specified k-mer counting table file as a starting point." />
+			help="(--loadtable) The inputs file(s) will be processed using the kmer counts in the specified k-mer counting table file as a starting point." />

 		<param	name="save_countingtable"
 			type="boolean"
 			label="Save the k-mer counting table(s) in a file"
-			help="" />
+			help="(--savetable)" />
 		<param	name="cutoff"
 			type="integer"
 			min="1"
 			value="20"
-			label="cutoff" />
+			label="cutoff"
+			help="(--cutoff)"/>
 		<expand macro="tableinputs" />
 	</inputs>
 	<outputs>
@@ -73,10 +75,8 @@
 		<!-- <expand macro="output_sequences" /> -->
 		<expand macro="output_sequences_single" />
 	</outputs>
-	<expand macro="stdio" />
-
 	<tests>
-		<test interactor="api">
+		<test>
 			<conditional name="parameters">
 				<param name="type" value="specific" />
 				<param name="inputs" value="test-abund-read-2.fa"/>
@@ -91,7 +91,7 @@
 				</discover_dataset>
 			</output>
 		</test>
-		<test interactor="api">
+		<test>
 			<param name="inputs" value="test-abund-read-2.fa" />
 			<param name="cutoff" value="2" />
 			<param name="ksize" value="17" />
@@ -104,7 +104,7 @@
 				</discover_dataset>
 			</output>
 		</test>
-		<test interactor="api">
+		<test>
 			<param name="inputs" value="test-abund-read-paired.fa" />
 			<param name="cutoff" value="1" />
 			<param name="ksize" value="17" />
@@ -120,13 +120,31 @@
 		</test>

 	</tests>
+	<help><![CDATA[
+Do digital normalization (remove mostly redundant sequences)
+
+Discard sequences based on whether or not their median k-mer abundance lies
+above a specified cutoff. Kept sequences will be placed in <fileN>.keep.
+
+Paired end reads will be considered together if :option:`-p` is set. If
+either read will be kept, then both will be kept. This should result in
+keeping (or discarding) each sequencing fragment. This helps with retention
+of repeats, especially.
+
+With :option:`-s`/:option:`--savetable`, the k-mer counting table
+will be saved to the specified file after all sequences have been
+processed. With :option:`-d`, the k-mer counting table will be
+saved every d files for multifile runs; if :option:`-s` is set,
+the specified name will be used, and if not, the name `backup.ct`
+will be used.  :option:`-l`/:option:`--loadtable` will load the
+specified k-mer counting table before processing the specified
+files.  Note that these tables are are in the same format as those
+produced by :program:`load-into-counting.py` and consumed by
+:program:`abundance-dist.py`.
+]]>
+	</help>
 	<citations>
 		<expand macro="software-citation" />
 		<expand macro="diginorm-citation" />
 	</citations>
-    <!-- [OPTIONAL] Help displayed in Galaxy -->
-    <!--
-	<help>
-	</help>
-    -->
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/out	Tue Jul 07 11:59:39 2015 -0400
@@ -0,0 +1,2 @@
+1 96 96 0.98
+1001 2 98 1.0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/out2	Tue Jul 07 11:59:39 2015 -0400
@@ -0,0 +1,2 @@
+1 96 96 0.98
+255 2 98 1.0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Tue Jul 07 11:59:39 2015 -0400
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This requires the Count and Presence Table datatype definitions.">
+	    <repository changeset_revision="08a714ff4ea5" name="oxli_datatypes" owner="crusoe" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+</repositories>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/random-20-a.fa	Tue Jul 07 11:59:39 2015 -0400
@@ -0,0 +1,198 @@
+>35
+CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC
+>16
+CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT
+>46
+GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT
+>40
+GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG
+>33
+GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG
+>98
+ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC
+>17
+CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA
+>89
+GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA
+>30
+GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA
+>82
+ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT
+>60
+GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA
+>83
+CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG
+>12
+AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG
+>85
+CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC
+>2
+CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC
+>45
+ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA
+>11
+GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG
+>39
+CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA
+>26
+AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA
+>75
+GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG
+>81
+GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG
+>97
+ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC
+>13
+AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT
+>92
+ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG
+>56
+AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA
+>61
+TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA
+>96
+ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG
+>31
+CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC
+>29
+TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT
+>54
+TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC
+>0
+TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT
+>90
+GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG
+>34
+TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG
+>43
+AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG
+>8
+ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG
+>37
+TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC
+>51
+ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG
+>32
+GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG
+>78
+TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC
+>18
+CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG
+>36
+TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA
+>53
+ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC
+>24
+AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC
+>7
+AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC
+>9
+AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT
+>47
+CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA
+>62
+ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA
+>79
+TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG
+>48
+TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC
+>66
+GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA
+>25
+GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC
+>5
+TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC
+>72
+ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT
+>76
+CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT
+>69
+GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT
+>87
+CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT
+>27
+TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC
+>77
+TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC
+>95
+TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC
+>63
+TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC
+>38
+CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG
+>20
+GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC
+>88
+GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT
+>49
+TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG
+>91
+TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG
+>86
+CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA
+>42
+CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC
+>70
+ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG
+>19
+GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG
+>84
+AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG
+>52
+TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT
+>71
+AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA
+>93
+CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA
+>58
+TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT
+>22
+TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG
+>50
+ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG
+>21
+TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT
+>73
+CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC
+>68
+CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT
+>23
+GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT
+>94
+AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA
+>10
+TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA
+>41
+GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC
+>80
+TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG
+>64
+AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA
+>57
+TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC
+>1
+GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT
+>55
+GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC
+>67
+GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT
+>14
+CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG
+>15
+AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT
+>59
+TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA
+>28
+CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA
+>74
+CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA
+>4
+TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC
+>65
+TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT
+>6
+ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG
+>44
+CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC
+>3
+TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG
Binary file test-data/test-abund-read-2.ct has changed
--- a/test-data/test-abund-read-2.ct.info	Mon Aug 18 07:02:05 2014 -0400
+++ b/test-data/test-abund-read-2.ct.info	Tue Jul 07 11:59:39 2015 -0400
@@ -1,2 +1,3 @@
-through end: test-abund-read-2.fa
+through test-data/test-abund-read-2.fa
 fp rate estimated to be 0.000
+
Binary file test-data/test-abund-read-2.nobigcount.ct has changed
--- a/test-data/test-abund-read-2.nobigcount.ct.info	Mon Aug 18 07:02:05 2014 -0400
+++ b/test-data/test-abund-read-2.nobigcount.ct.info	Tue Jul 07 11:59:39 2015 -0400
@@ -1,2 +1,3 @@
-through end: test-abund-read-2.fa
+through test-data/test-abund-read-2.fa
 fp rate estimated to be 0.000
+
--- a/test-data/test-abund-read-paired.fa	Mon Aug 18 07:02:05 2014 -0400
+++ b/test-data/test-abund-read-paired.fa	Tue Jul 07 11:59:39 2015 -0400
@@ -2,11 +2,11 @@
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 >895:1:37:17593:9954/2
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954 1::FOO
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
+>895:1:37:17593:9954 2::FOO
+GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 >895:1:37:17593:9954/1
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
 >895:1:37:17593:9954/2
 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
->895:1:37:17593:9954/1
-GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
->895:1:37:17593:9954/2
-GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
--- a/tool_dependencies.xml	Mon Aug 18 07:02:05 2014 -0400
+++ b/tool_dependencies.xml	Tue Jul 07 11:59:39 2015 -0400
@@ -1,9 +1,14 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="khmer" version="1.1">
+    <package name="khmer" version="2.0rc1">
         <install version="1.0">
-            <actions>
-                <action type="shell_command">easy_install -U setuptools==3.4.1; pip install --user khmer==1.1 || pip install khmer==1.1</action>
+	    <actions>
+		    <action type="setup_python_environment">
+			    <repository changeset_revision="44e7542c51e6" name="package_python_2_7" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu">
+				<package name="python" version="2.7" />
+			</repository>
+			<package>https://pypi.python.org/packages/source/k/khmer/khmer-2.0rc1.tar.gz#md5=d8ea5e3ba34de0380007c74d61fc6d1a</package>
+		</action>
             </actions>
         </install>
     </package>