Mercurial > repos > crusoe > khmer
changeset 60:fe697e0cb24a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit d8e0950d53e504e02ee5db43c0804142b14d7fd2-dirty
author | crusoe |
---|---|
date | Tue, 07 Jul 2015 11:59:39 -0400 |
parents | 08a599cf71d0 |
children | |
files | README.txt abundance-dist-single.xml abundance-dist.xml count-median.xml datatypes_conf.xml do-partition.xml extract-partitions.xml filter-abund.xml filter-below-abund.py filter-below-abund.xml gedlab.py macros.xml normalize-by-median.xml out out2 repository_dependencies.xml test-data/random-20-a.fa test-data/test-abund-read-2.ct test-data/test-abund-read-2.ct.info test-data/test-abund-read-2.fa.ct test-data/test-abund-read-2.nobigcount.ct test-data/test-abund-read-2.nobigcount.ct.info test-data/test-abund-read-paired.fa tool_dependencies.xml |
diffstat | 23 files changed, 405 insertions(+), 222 deletions(-) [+] |
line wrap: on
line diff
--- a/README.txt Mon Aug 18 07:02:05 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -Requires galaxy-central dating from 2014-06-30 or newer - -https://bitbucket.org/galaxy/galaxy-central/commits/4de240e5a7819c768b8267c19e477530dab54370 - -
--- a/abundance-dist-single.xml Mon Aug 18 07:02:05 2014 -0400 +++ b/abundance-dist-single.xml Tue Jul 07 11:59:39 2015 -0400 @@ -1,18 +1,19 @@ <tool id="gedlab-khmer-abundance-dist-single" name="Abundance Distribution (all-in-one)" - version="1.1-1" - force_history_refresh="true"> - + version="2.0rc1-1"> + <description> - Calculate abundance distribution of the k-mers in a given sequence - file. + Calculate abundance distribution of the k-mers in a given + sequence file. </description> <macros> <token name="@BINARY@">abundance-dist-single.py</token> <import>macros.xml</import> </macros> <expand macro="requirements" /> - <command> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces mkdir output; cd output; @@ -27,7 +28,8 @@ --squash @THREADS@ $input_sequence_filename -$output_histogram_filename +$output_histogram_filename +]]> </command> <inputs> @@ -51,9 +53,8 @@ label="${tool.name} k-mer counting table"> <filter>save_countingtable == True</filter> </data> - <expand macro="abundance-histogram-output" /> + <expand macro="abundance-histogram-output" /> </outputs> - <expand macro="stdio" /> <tests> <test> <param name="input_sequence_filename" value="test-abund-read-2.fa" /> @@ -86,13 +87,20 @@ </test> </tests> + <help><![CDATA[ +Calculate the abundance distribution of k-mers from a single sequence file. + +Note that with :option:`-b` this script is constant memory; in exchange, +k-mer counts will stop at 255. The memory usage of this script with +:option:`-b` will be about 1.15x the product of the :option:`-x` and +:option:`-N` numbers. + +To count k-mers in multiple files use :program:`load_into_counting.py` and +:program:`abundance_dist.py`. +]]> + </help> <citations> <expand macro="software-citation" /> <expand macro="counting-citation" /> </citations> - <!-- [OPTIONAL] Help displayed in Galaxy --> - <!-- - <help> - </help> - --> </tool>
--- a/abundance-dist.xml Mon Aug 18 07:02:05 2014 -0400 +++ b/abundance-dist.xml Tue Jul 07 11:59:39 2015 -0400 @@ -1,8 +1,7 @@ <tool id="gedlab-khmer-abundance-dist" name="Abundance Distribution" - version="1.1-1" - force_history_refresh="true"> - + version="2.0rc1-1"> + <description> Calculate abundance distribution of the k-mers in a given sequence file using a pre-made k-mer counting table. @@ -11,8 +10,10 @@ <token name="@BINARY@">abundance-dist.py</token> <import>macros.xml</import> </macros> - <expand macro="requirements" /> - <command> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ ## The command is a Cheetah template which allows some Python based syntax. ## Lines starting hash hash are comments. Galaxy will turn newlines into spaces mkdir output; cd output; @@ -20,18 +21,18 @@ --squash $input_counting_table_filename $input_sequence_filename -$output_histogram_filename +$output_histogram_filename +]]> </command> <inputs> + <expand macro="input_counting_table_filename" /> <expand macro="input_sequence_filename" /> - <expand macro="input_counting_table_filename" /> <expand macro="input_zero" /> </inputs> <outputs> <expand macro="abundance-histogram-output" /> </outputs> - <expand macro="stdio" /> <tests> <test> <param name="input_sequence_filename" value="test-abund-read-2.fa" /> @@ -47,7 +48,10 @@ <test> <param name="input_sequence_filename" value="test-abund-read-2.fa" /> <param name="input_counting_table_filename" value="test-abund-read-2.nobigcount.ct" ftype="ct" /> - <param name="no_zero" value="false" /> + <param name="no_zero" value="false" /> + <assert_stderr> + <has_line_matching expression="WARNING: The loaded graph has bigcount" /> + </assert_stderr> <output name="output_histogram_filename"> <assert_contents> <has_line_matching expression="1 96 96 0.98" /> @@ -56,13 +60,13 @@ </output> </test> </tests> + <help><![CDATA[ +Calculate abundance distribution of the k-mers in the sequence file using a +pre-made k-mer counting table. +]]> + </help> <citations> <expand macro="software-citation" /> <expand macro="counting-citation" /> </citations> - <!-- [OPTIONAL] Help displayed in Galaxy --> - <!-- - <help> - </help> - --> </tool>
--- a/count-median.xml Mon Aug 18 07:02:05 2014 -0400 +++ b/count-median.xml Tue Jul 07 11:59:39 2015 -0400 @@ -1,8 +1,7 @@ <tool id="gedlab-khmer-count-median" name="Count Median" - version="1.1-1" - force_history_refresh="true"> - + version="2.0rc1-1"> + <description> Count the median/avg k-mer abundance for each sequence in the input file, based on the k-mer counts in the given k-mer @@ -13,12 +12,15 @@ <token name="@BINARY@">count-median.py</token> <import>macros.xml</import> </macros> - <expand macro="requirements" /> - <command> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ @BINARY@ $input_counting_table_filename $input_sequence_filename $output_summary_filename +]]> </command> <inputs> @@ -26,12 +28,11 @@ <expand macro="input_counting_table_filename" /> </inputs> <outputs> - <data name="output_summary_filename" format="text" + <data name="output_summary_filename" format="txt" label="${input_sequence_filename} sequence id, median, average, stddev, and seq length" /> </outputs> - <expand macro="stdio" /> <tests> - <test interactor="api"> + <test> <param name="input_sequence_filename" value="test-abund-read-2.fa" /> <param name="input_counting_table_filename" @@ -46,13 +47,18 @@ </output> </test> </tests> + <help> +Count the median/avg k-mer abundance for each sequence in the input file, +based on the k-mer counts in the given k-mer counting table. Can be used to +estimate expression levels (mRNAseq) or coverage (genomic/metagenomic). The +output file contains sequence id, median, average, stddev, and seq length; +fields are separated by spaces. For khmer 1.x count-median.py will split +sequence names at the first space which means that some sequence formats (e.g. +paired FASTQ in Casava 1.8 format) will yield uninformative names. Use +:option:`--csv` to fix this behavior. + </help> <citations> <expand macro="software-citation" /> <expand macro="diginorm-citation" /> </citations> - <!-- [OPTIONAL] Help displayed in Galaxy --> - <!-- - <help> - </help> - --> </tool>
--- a/datatypes_conf.xml Mon Aug 18 07:02:05 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -<?xml version="1.0"?> -<datatypes> - <datatype_files> - <datatype_file name="gedlab.py"/> - </datatype_files> - <registration> - <datatype extension="ct" type="galaxy.datatypes.gedlab:Count" mimetype="application/octet-stream" display_in_upload="true"/> - <datatype extension="pt" type="galaxy.datatypes.gedlab:Presence" mimetype="application/octet-stream" display_in_upload="true"/> - </registration> - <sniffers> - <sniffer type="galaxy.datatypes.gedlab:Count"/> - <sniffer type="galaxy.datatypes.gedlab:Presence"/> - </sniffers> -</datatypes>
--- a/do-partition.xml Mon Aug 18 07:02:05 2014 -0400 +++ b/do-partition.xml Tue Jul 07 11:59:39 2015 -0400 @@ -1,8 +1,7 @@ <tool id="gedlab-khmer-do-partition" name="Sequence partition all-in-one" - version="1.1-1" - force_history_refresh="true"> - + version="2.0rc1-1"> + <description> Load, partition, and annotate FAST[AQ] sequences </description> @@ -10,8 +9,10 @@ <token name="@BINARY@">do-parition.py</token> <import>macros.xml</import> </macros> - <expand macro="requirements" /> - <command> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ mkdir -p output; @BINARY@ @TABLEPARAMS@ @@ -22,6 +23,7 @@ #end for ; mv output.info $infomation ; mv *.part output/ +]]> </command> <inputs> @@ -48,60 +50,31 @@ label="${tool.name} summary for #echo ','.join(map(str, $inputs ))#" /> <expand macro="output_sequences" /> </outputs> - <expand macro="stdio" /> -<!-- <tests> - <test interactor="api"> - <conditional name="parameters"> - <param name="type" value="specific" /> - <param name="inputs" value="test-abund-read-2.fa"/> - <param name="cutoff" value="1" /> - <param name="ksize" value="17" /> - </conditional> + <tests> + <test> + <param name="inputs" value="random-20-a.fa"/> <output name="output"> - <discover_dataset name="test-abund-read-2.fa.keep"> - <assert_contents> - <has_line_matching expression="GGTTGACGGGGCTCAGGGGG" /> - </assert_contents> - </discover_dataset> - </output> - </test> - <test interactor="api"> - <param name="inputs" value="test-abund-read-2.fa" /> - <param name="cutoff" value="2" /> - <param name="ksize" value="17" /> - <output name="output"> - <discover_dataset name="test-abund-read-2.fa.keep"> + <discovered_dataset designation="random-20-a.fa.part"> <assert_contents> - <has_line_matching expression="GGTTGACGGGGCTCAGGGGG" /> - <has_line_matching expression="GGTTGACGGGGCTCAGGG" /> + <has_text text='>35 2' /> </assert_contents> - </discover_dataset> - </output> - </test> - <test interactor="api"> - <param name="inputs" value="test-abund-read-paired.fa" /> - <param name="cutoff" value="1" /> - <param name="ksize" value="17" /> - <param name="paired" value="true" /> - <output name="output"> - <discover_dataset name="test-abund-read-paired.fa.keep"> - <assert_contents> - <has_line_matching expression="GGTTGACGGGGCTCAGGGGG" /> - <has_line_matching expression="GGTTGACGGGGCTCAGGG" /> - </assert_contents> - </discover_dataset> - </output> - </test> + </discovered_dataset> + </output> + </test> + </tests> + <help><![CDATA[ +Load in a set of sequences, partition them, merge the partitions, and +annotate the original sequences files with the partition information. - </tests> - --> +This script combines the functionality of :program:`load-graph.py`, +:program:`partition-graph.py`, :program:`merge-partitions.py`, and +:program:`annotate-partitions.py` into one script. This is convenient +but should probably not be used for large data sets, because +:program:`do-partition.py` doesn't provide save/resume functionality. +]]> + </help> <citations> <expand macro="software-citation" /> <expand macro="graph-citation" /> </citations> - <!-- [OPTIONAL] Help displayed in Galaxy --> - <!-- - <help> - </help> - --> </tool>
--- a/extract-partitions.xml Mon Aug 18 07:02:05 2014 -0400 +++ b/extract-partitions.xml Tue Jul 07 11:59:39 2015 -0400 @@ -1,8 +1,7 @@ <tool id="gedlab-khmer-extract-partitions" name="Extract partitions" - version="1.1-1" - force_history_refresh="true"> - + version="2.0rc1-1"> + <description> Separate sequences that are annotated with partitions into grouped files. @@ -11,19 +10,23 @@ <token name="@BINARY@">extract-partitions.py</token> <import>macros.xml</import> </macros> - <expand macro="requirements" /> - <command> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ mkdir -p output ; cd output ; @BINARY@ --max-size $max_size --min-partition-size $min_partition_size -$output_unasssigned +$output_unassigned output #for input in $inputs $input -#end for ; +#end for +; mv output.dist $distribution +]]> </command> <inputs> @@ -51,27 +54,25 @@ label="Partition size distribution from ${tool.name}" /> <expand macro="output_sequences" /> </outputs> - <expand macro="stdio" /> - + <tests> - <test interactor="api"> + <test> <param name="inputs" value="random-20-a.fa.part"/> <output name="distribution"> <assert_contents> <has_line_matching - expression="99 1 1 99" /> + expression="90 1 3 98" /> </assert_contents> </output> </test> </tests> + <help><![CDATA[ +Separate sequences that are annotated with partitions into grouped files. +]]> + </help> <citations> <expand macro="software-citation" /> <expand macro="graph-citation" /> </citations> - <!-- [OPTIONAL] Help displayed in Galaxy --> - <!-- - <help> - </help> - --> </tool>
--- a/filter-abund.xml Mon Aug 18 07:02:05 2014 -0400 +++ b/filter-abund.xml Tue Jul 07 11:59:39 2015 -0400 @@ -1,8 +1,7 @@ <tool id="gedlab-khmer-filter-abund" name="Filter by abundance" - version="1.1-1" - force_history_refresh="true"> - + version="2.0rc1-1"> + <description> Trims fastq/fasta sequences at k-mers of a given abundance based on a provided k-mer counting table. @@ -12,7 +11,9 @@ <import>macros.xml</import> </macros> <expand macro="requirements" /> - <command> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ mkdir output; cd output; @BINARY@ #if $cutoff != 2 @@ -25,6 +26,7 @@ $input #end for --out $output +]]> </command> <inputs> @@ -41,15 +43,14 @@ value="2" label="cutoff" help="Trim at k-mers below this abundance. (--cutoff)" /> - <expand macro="input_counting_table_filename" /> + <expand macro="input_counting_table_filename" /> </inputs> <outputs> <!-- <expand macro="output_sequences" /> --> <expand macro="output_sequences_single" /> </outputs> - <expand macro="stdio" /> <tests> - <test interactor="api"> + <test> <param name="inputs" value="test-abund-read-2.fa" /> <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" /> @@ -61,7 +62,7 @@ <!-- </discover_dataset> --> </output> </test> - <test interactor="api"> + <test> <param name="input_sequence_filename" value="test-abund-read-2.fa" /> <param name="input_counting_table_filename" @@ -75,14 +76,17 @@ <!-- </discover_dataset> --> </output> </test> - </tests> + </tests> + <help><![CDATA[ +Trim sequences at a minimum k-mer abundance. + +Trimmed sequences will be placed in ${input_sequence_filename}.abundfilt +for each input sequence file. If the input sequences are from RNAseq or +metagenome sequencing then :option:`--variable-coverage` should be used. +]]> + </help> <citations> <expand macro="software-citation" /> <expand macro="counting-citation" /> </citations> - <!-- [OPTIONAL] ReST Help displayed in Galaxy --> - <!-- - <help> - </help> - --> </tool>
--- a/filter-below-abund.py Mon Aug 18 07:02:05 2014 -0400 +++ b/filter-below-abund.py Tue Jul 07 11:59:39 2015 -0400 @@ -1,12 +1,12 @@ -#! /usr/bin/env python2 +#! /usr/bin/env python # -# This file is part of khmer, http://github.com/ged-lab/khmer/, and is -# Copyright (C) Michigan State University, 2009-2013. It is licensed under -# the three-clause BSD license; see doc/LICENSE.txt. +# This file is part of khmer, https://github.com/dib-lab/khmer/, and is +# Copyright (C) Michigan State University, 2009-2015. It is licensed under +# the three-clause BSD license; see LICENSE. # Contact: khmer-project@idyll.org # +from __future__ import print_function import sys -import screed.fasta import os import khmer from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter @@ -23,17 +23,17 @@ counting_ht = sys.argv[1] infiles = sys.argv[2:] - print 'file with ht: %s' % counting_ht - print '-- settings:' - print 'N THREADS', WORKER_THREADS - print '--' + print('file with ht: %s' % counting_ht) + print('-- settings:') + print('N THREADS', WORKER_THREADS) + print('--') - print 'making hashtable' + print('making hashtable') ht = khmer.load_counting_hash(counting_ht) K = ht.ksize() for infile in infiles: - print 'filtering', infile + print('filtering', infile) outfile = os.path.basename(infile) + '.below' outfp = open(outfile, 'w')
--- a/filter-below-abund.xml Mon Aug 18 07:02:05 2014 -0400 +++ b/filter-below-abund.xml Tue Jul 07 11:59:39 2015 -0400 @@ -1,7 +1,6 @@ <tool id="gedlab-khmer-filter-below-abund" name="Filter below abundance cutoff of 50" - version="1.1-1" - force_history_refresh="true"> + version="2.0rc1-1"> <!-- Work in progress, gating on filter-below-abund.py being upgraded --> <description> @@ -13,6 +12,8 @@ <import>macros.xml</import> </macros> <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version" /> <command> mkdir output; cd output; @BINARY@ @@ -24,15 +25,14 @@ <inputs> <expand macro="input_sequences_filenames" /> - <expand macro="input_counting_table_filename" /> + <expand macro="input_counting_table_filename" /> </inputs> <outputs> <!-- <expand macro="output_sequences" /> --> <expand macro="output_sequences_single" /> </outputs> - <expand macro="stdio" /> <!-- <tests> - <test interactor="api"> + <test> <param name="inputs" value="test-abund-read-2.fa" /> <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" /> <output name="output"> @@ -40,7 +40,7 @@ </discover_dataset> </output> </test> - <test interactor="api"> + <test> <param name="input_sequence_filename" value="test-abund-read-2.fa" /> <param name="input_counting_table_filename" value="test-abund-read-2.ct" ftype="ct" /> <param name="cutoff" value="1" /> @@ -61,5 +61,5 @@ <!-- <help> </help> - --> + --> </tool>
--- a/gedlab.py Mon Aug 18 07:02:05 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -""" -k-mer count and presence -""" - -from galaxy.datatypes.binary import Binary - -import os -import logging - -log = logging.getLogger(__name__) - - -class Count(Binary): - - def __init__(self, **kwd): - Binary.__init__(self, **kwd) - - -class Presence(Binary): - - def __init__(self, **kwd): - Binary.__init__(self, **kwd) - -Binary.register_unsniffable_binary_ext("ct") -Binary.register_unsniffable_binary_ext("pt")
--- a/macros.xml Mon Aug 18 07:02:05 2014 -0400 +++ b/macros.xml Tue Jul 07 11:59:39 2015 -0400 @@ -1,19 +1,20 @@ <macros> <xml name="requirements"> <requirements> - <!-- <requirement type="binary">@BINARY@</requirement> --> - <requirement type="package" version="1.1">khmer</requirement> + <requirement type="package" version="2.0rc1">khmer</requirement> </requirements> + </xml> + <xml name="version"> <version_command>@BINARY@ --version</version_command> </xml> <token name="@TABLEPARAMS@">#if $parameters.type == "simple" --ksize=20 --n_tables=4 - --min-tablesize=$parameters.tablesize + --max-tablesize=$parameters.tablesize #else --ksize=$parameters.ksize --n_tables=$parameters.n_tables - --min-tablesize=$parameters.tablesize_specific + --max-tablesize="$parameters.tablesize_specific" #end if</token> <token name="@THREADS@">--threads \${GALAXY_SLOTS:-4}</token> <xml name="tableinputs"> @@ -93,7 +94,7 @@ </xml> <xml name="abundance-histogram-output"> <data name="output_histogram_filename" - format="text" + format="txt" label="${tool.name} k-mer abundance histogram. The columns are: (1) k-mer abundance, (2) k-mer count, (3) cumulative count, (4) fraction of total distinct @@ -102,14 +103,14 @@ </xml> <xml name="output_sequences"> <data name="output" - format="input" + format_source="inputs" label="${tool.name} processed nucleotide sequence file"> <discover_datasets pattern="__name__" directory="output" visible="true"/> </data> </xml> <xml name="output_sequences_single"> <data name="output" - format="input" + format_source="input_sequence_filename" label="${tool.name} processed nucleotide sequence file" /> </xml> <xml name="input_zero"> @@ -152,7 +153,6 @@ </xml> <xml name="stdio"> <stdio> - <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR --> <exit_code range="1:" level="fatal" /> </stdio>
--- a/normalize-by-median.xml Mon Aug 18 07:02:05 2014 -0400 +++ b/normalize-by-median.xml Tue Jul 07 11:59:39 2015 -0400 @@ -1,8 +1,7 @@ <tool id="gedlab-khmer-normalize-by-median" name="Normalize By Median" - version="1.1-4" - force_history_refresh="true"> - + version="2.0rc1-1"> + <description> Filters a fastq/fasta file using digital normalization via median k-mer abundances. @@ -12,7 +11,9 @@ <import>macros.xml</import> </macros> <expand macro="requirements" /> - <command> + <expand macro="stdio" /> + <expand macro="version" /> + <command><![CDATA[ mkdir output; cd output; normalize-by-median.py @@ -32,8 +33,8 @@ #end for #end for --out=$output +]]> </command> - <inputs> <repeat name="many_inputs" title="input(s) set" min="1" default="1"> <expand macro="input_sequences_filenames" /> @@ -44,24 +45,25 @@ truevalue="--paired" falsevalue="" label="Are the inputs interleaved paired ends?" - help="If so, then selecting this option will process the paired ends together." /> + help="(--paired) If so, then selecting this option will process the paired ends together." /> <param name="countingtable_to_load" type="data" format="ct" optional="true" label="an optional k-mer counting table to load" - help="The inputs file(s) will be processed using the kmer counts in the specified k-mer counting table file as a starting point." /> + help="(--loadtable) The inputs file(s) will be processed using the kmer counts in the specified k-mer counting table file as a starting point." /> <param name="save_countingtable" type="boolean" label="Save the k-mer counting table(s) in a file" - help="" /> + help="(--savetable)" /> <param name="cutoff" type="integer" min="1" value="20" - label="cutoff" /> + label="cutoff" + help="(--cutoff)"/> <expand macro="tableinputs" /> </inputs> <outputs> @@ -73,10 +75,8 @@ <!-- <expand macro="output_sequences" /> --> <expand macro="output_sequences_single" /> </outputs> - <expand macro="stdio" /> - <tests> - <test interactor="api"> + <test> <conditional name="parameters"> <param name="type" value="specific" /> <param name="inputs" value="test-abund-read-2.fa"/> @@ -91,7 +91,7 @@ </discover_dataset> </output> </test> - <test interactor="api"> + <test> <param name="inputs" value="test-abund-read-2.fa" /> <param name="cutoff" value="2" /> <param name="ksize" value="17" /> @@ -104,7 +104,7 @@ </discover_dataset> </output> </test> - <test interactor="api"> + <test> <param name="inputs" value="test-abund-read-paired.fa" /> <param name="cutoff" value="1" /> <param name="ksize" value="17" /> @@ -120,13 +120,31 @@ </test> </tests> + <help><![CDATA[ +Do digital normalization (remove mostly redundant sequences) + +Discard sequences based on whether or not their median k-mer abundance lies +above a specified cutoff. Kept sequences will be placed in <fileN>.keep. + +Paired end reads will be considered together if :option:`-p` is set. If +either read will be kept, then both will be kept. This should result in +keeping (or discarding) each sequencing fragment. This helps with retention +of repeats, especially. + +With :option:`-s`/:option:`--savetable`, the k-mer counting table +will be saved to the specified file after all sequences have been +processed. With :option:`-d`, the k-mer counting table will be +saved every d files for multifile runs; if :option:`-s` is set, +the specified name will be used, and if not, the name `backup.ct` +will be used. :option:`-l`/:option:`--loadtable` will load the +specified k-mer counting table before processing the specified +files. Note that these tables are are in the same format as those +produced by :program:`load-into-counting.py` and consumed by +:program:`abundance-dist.py`. +]]> + </help> <citations> <expand macro="software-citation" /> <expand macro="diginorm-citation" /> </citations> - <!-- [OPTIONAL] Help displayed in Galaxy --> - <!-- - <help> - </help> - --> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/out Tue Jul 07 11:59:39 2015 -0400 @@ -0,0 +1,2 @@ +1 96 96 0.98 +1001 2 98 1.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/out2 Tue Jul 07 11:59:39 2015 -0400 @@ -0,0 +1,2 @@ +1 96 96 0.98 +255 2 98 1.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Tue Jul 07 11:59:39 2015 -0400 @@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="This requires the Count and Presence Table datatype definitions."> + <repository changeset_revision="08a714ff4ea5" name="oxli_datatypes" owner="crusoe" toolshed="https://testtoolshed.g2.bx.psu.edu" /> +</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/random-20-a.fa Tue Jul 07 11:59:39 2015 -0400 @@ -0,0 +1,198 @@ +>35 +CGCAGGCTGGATTCTAGAGGCAGAGGTGAGCTATAAGATATTGCATACGTTGAGCCAGC +>16 +CGGAAGCCCAATGAGTTGTCAGAGTCACCTCCACCCCGGGCCCTGTTAGCTACGTCCGT +>46 +GGTCGTGTTGGGTTAACAAAGGATCCCTGACTCGATCCAGCTGGGTAGGGTAACTATGT +>40 +GGCTGAAGGAGCGGGCGTACGTGTTTACGGCATGATGGCCGGTGATTATGGGGGACGGG +>33 +GCAGCGGCTTTGAATGCCGAATATATAACAGCGACGGGGTTCAATAAGCTGCACATGCG +>98 +ACCAGATGCATAGCCCAACAGCTGAGACATTCCCAGCTCGCGAACCAAGACGTGAGAGC +>17 +CCCTGTTAGCTACGTCCGTCTAAGGATATTAACATAGTTGCGACTGCGTCCTGTGCTCA +>89 +GCGAGATACTAGCAAAGGTTCATCAACAGCTACACCCGACGAACCCCGAGAAATTGGGA +>30 +GTTATGGTCCAGGATGAATGCGCGTACCGGGCGCCTATCACTCCTCTTGTCATTCAGAA +>82 +ATGCACTATATTTAAGAGGTCTAGAGTGTAAAAAGTGTACCCTTCGGGGTGGAGCTGTT +>60 +GTTTTTGTCATCGTGCATAAAGCGGGACAGAGTTCAACGGTATTCGAATGCACACCCTA +>83 +CCTTCGGGGTGGAGCTGTTAATGAACTCAAGTGGCGATGGAGGCTAAAACGATACGTTG +>12 +AGCCAATTGTAACCATATGGTATCCAGTTTCCGTAGCAGCAATGCGCGACGGGCAATCG +>85 +CGTGATATGATTACTAAAGGGGCCCGCAAAAACCCATTCACTGAGGGCTCTGTCCGTAC +>2 +CCCGTGGGGCGGGCTAATTTTAAAGGCAGGTTGCTACACGTCAACTCTACCCAAGCTCC +>45 +ATACGCCACTCGACTTGGCTCGCCCTCGATCTAAAATAGCGGTCGTGTTGGGTTAACAA +>11 +GCAGCAGACCAACATCCAACACTTTTCACAAGAGGCTGACAGCCAATTGTAACCATATG +>39 +CAATTGACTTCCATGTGGGTCGGCTGTCAAGTCTAAACCGGGCTGAAGGAGCGGGCGTA +>26 +AACATCTTAACCTCTGATCCCAACATGAGGGACATGAGTTTTCAAAGTAACGATGCGCA +>75 +GTCGGTGCCCGCGTGCGGAGCAGTCTTGATCCGGCGCGCTCTTACCTATGGTCGGCACG +>81 +GGCTACTGGTTGATAAGCGTACGTAAAAGGCGAGTCTTACATGCACTATATTTAAGAGG +>97 +ATTAGTGTGACTAGCCGAGTGCCCCAGCGTTTATCCAATGACCAGATGCATAGCCCAAC +>13 +AATGCGCGACGGGCAATCGCGTCTGCGTTGATCGTCGCCCCTATTGTCGCTCCCTTAGT +>92 +ATCAGGGCAAATTTGCTCGTGACTAAATGGTAATACTACCCGGGACAGTAAACTTTTGG +>56 +AGATCTGCTTGGGTGTATCCCCATTCAGAGATACCAGATCTAAGCGACCATCAGAAACA +>61 +TATTCGAATGCACACCCTAACATACTGGAAGATTCACTCTATATACCGGGAACTACTAA +>96 +ATTAGACCGCTATCAACTCTTGCGAGGAAGGTCTGGGCCTATTAGTGTGACTAGCCGAG +>31 +CTCCTCTTGTCATTCAGAAGGAATTTGATTAATTACCTGGGCTGACTCGCGCCCCCTGC +>29 +TGGAAGCGCCCTCCGCTCAGGCGTTTTAGTAGATCCCAGTGTTATGGTCCAGGATGAAT +>54 +TGGATGAGGTCCTTAAGGCCTAATTGACCAATCGCCCCAAGATTGGTGGTGAATGACTC +>0 +TAGTGATCAGCGGCTAGTGTCGCCCCTCTTAGCACCTTGCGATCATCGAATCGGGCTGT +>90 +GAACCCCGAGAAATTGGGAAGCCTGGAGGCAGTACAGTCATCCAGTCTGCTGCTCAAAG +>34 +TCAATAAGCTGCACATGCGTGGTTGTGGCACGATCAGTTCCGCAGGCTGGATTCTAGAG +>43 +AGGACTCGACGTCCGCCCCATGCTTGAGAGAAGGTTTCGGCCAACCATGGTAGGTTAGG +>8 +ACACACAAGGCCAGACACCAACTTGGCCGTGGAATTTATCAACACTTCTGAGACGAAGG +>37 +TGTGCGCTGTGAGATACAACTATAGGCACCGGGTTGCTGGCTAATAACCATTTAGAGTC +>51 +ACACAATGGACGCGTTAAGGAGAACCGGTCGCAACCAGGTTGAAAATGCCTGATATACG +>32 +GCTGACTCGCGCCCCCTGCAGGCTGCTATGATTGAGTGCGGCAGCGGCTTTGAATGCCG +>78 +TCTGGGGCGAGATCCCCTCTGCTCACTTTCTTGTAGTAAATACACCGAAGGGGCGAACC +>18 +CGACTGCGTCCTGTGCTCAGTTCGTGACGCCGAACTCAAGGACGCGGTACGAAGAACTG +>36 +TTGCATACGTTGAGCCAGCGCCGCCCGTATACACAGGGTCTGTGCGCTGTGAGATACAA +>53 +ATATAAGTTTTTTAGATGTAAAAAATTTTTTATGGCGGCCTGGATGAGGTCCTTAAGGC +>24 +AAGAAACAGGCTAGGTCTTCCATGCAATGGTTCTCACAGTGTAGTCGCGCATCAACTCC +>7 +AAACGTCTAAGTAATCATGCGACCGGCGCCTCGATTGGACACACACAAGGCCAGACACC +>9 +AACACTTCTGAGACGAAGGTCATTTACGATTGGGACACTTTCTCGAACTCCGGTTAATT +>47 +CTGGGTAGGGTAACTATGTAGCCATCGCTCAGTGGATTCTTCCGGGATAGGGTGTGCGA +>62 +ATATACCGGGAACTACTAAAATTTTGGGCTACTCTATGCTTACAGCCCAACATGCGCAA +>79 +TACACCGAAGGGGCGAACCCTGTCTACATTCGCAAATGCATCCTACCTGAGAGGCTTCG +>48 +TCCGGGATAGGGTGTGCGAATGTGCCGGGCATTCAGCTCCTTAGAGACGAGTTACGAGC +>66 +GGCGCGACCAATATTCATTTGATGAGAATTGAAATCGACTGAATCACGGGATTTATACA +>25 +GTAGTCGCGCATCAACTCCGCCAGTTTTATCGAAGCGCCCAACATCTTAACCTCTGATC +>5 +TCATTACGGGGTGTCCATCTAGAGAAAGTGGGTTTCCCTTATAGAAATGAGGAGGATTC +>72 +ATAAAAAACGACTTCTAAAGCGACACTGGTTTTATCCTTCCCTGTTTTCCTCGCCCCAT +>76 +CTTACCTATGGTCGGCACGATTCCATTGGCGGATATAGGATTGATTACGTGTGTTTACT +>69 +GCAGCGAGGTATTTAAACTGTTCAATCGGCGCAACCGAAAATCTGCTACCGTGGTTGCT +>87 +CAGTATACGCCCGTTGAGAAACAGGTGGTGGCGCAGTGTCGATTACTTCGTAATAATTT +>27 +TTCAAAGTAACGATGCGCAGATTGAATAATGCCATATCTGCGCGAGAGGTTTCAGGTAC +>77 +TTGATTACGTGTGTTTACTATACCGGTAGAAGCCTTCAGTTCTGGGGCGAGATCCCCTC +>95 +TACGTGTGGCATCGTTGCACCCTAATTCGCATTATTAAGTATTAGACCGCTATCAACTC +>63 +TACAGCCCAACATGCGCAACAACTATAAGCTGCTGCTGACAGATCCGTTTGTTCCGGAC +>38 +CTAATAACCATTTAGAGTCGCCCGCGGTGATGAGTAATCGCAATTGACTTCCATGTGGG +>20 +GTGCCTACCGTACCTGTCGAGCCAGTGCGATCAGTAAAACTACCGATTCGTGGCCTCCC +>88 +GATTACTTCGTAATAATTTGAGGGTGCTGCCGCGTGTTCCGCGAGATACTAGCAAAGGT +>49 +TTAGAGACGAGTTACGAGCCACTCTTGGATCGTCATGCATACCTCGCAGATCGGCAGAG +>91 +TCCAGTCTGCTGCTCAAAGTCCATCTACATGTAAAGAACCATCAGGGCAAATTTGCTCG +>86 +CTGAGGGCTCTGTCCGTACGTGTACTATAGATCCTTGCTCCAGTATACGCCCGTTGAGA +>42 +CATATTTCAGGCGTGCGCCAACTTACGATTCTTGAATCCAAGGACTCGACGTCCGCCCC +>70 +ATCTGCTACCGTGGTTGCTTCGACCATGGTAAACTGAGTAAGCCCTTATGAGTTGCGGG +>19 +GACGCGGTACGAAGAACTGCTCCAGCAACAGCATTCCTTGGTGCCTACCGTACCTGTCG +>84 +AGGCTAAAACGATACGTTGTATACTAAGAACTGTCTACATCGTGATATGATTACTAAAG +>52 +TGAAAATGCCTGATATACGAAGATTAAGCGGCTTTGGATCATATAAGTTTTTTAGATGT +>71 +AGCCCTTATGAGTTGCGGGTCGTGCTGTTAGACTGAACACATAAAAAACGACTTCTAAA +>93 +CGGGACAGTAAACTTTTGGTGATGCCAGCACGACCAGCGCAGGGTCAAGAAAACTATTA +>58 +TCGTGGTACACCCGGAGTCTCGAAAGGAGCTTGCAAAGCTTTTCAGCATGGGTCGCATT +>22 +TTCATTCCCCTGTAACGTTTCGAACTCAACTTGCTTGCCCGACATATGGCGGTACGCGG +>50 +ACCTCGCAGATCGGCAGAGAACGGTTTGGTCTGTTTGCGTACACAATGGACGCGTTAAG +>21 +TACCGATTCGTGGCCTCCCGTTCGTCGCAATGAACGGCTTTTCATTCCCCTGTAACGTT +>73 +CCTGTTTTCCTCGCCCCATGCAATGGTAACTAATATACCGCCCCATAGTCTTAATAACC +>68 +CTGTCCCAACGGTAACAATGGAGGCACTATACCGACGCTCGCAGCGAGGTATTTAAACT +>23 +GACATATGGCGGTACGCGGGCTCAGCGCTCCGCCAGTAAGAAGAAACAGGCTAGGTCTT +>94 +AGGGTCAAGAAAACTATTAATTTAAGCGCTGTTTAGTAACTACGTGTGGCATCGTTGCA +>10 +TCTCGAACTCCGGTTAATTTGCAATCCGGGGGTTTGCTCAGCAGCAGACCAACATCCAA +>41 +GGTGATTATGGGGGACGGGTATAGTACTAATAGTTTTGGGCATATTTCAGGCGTGCGCC +>80 +TCCTACCTGAGAGGCTTCGACTAAAGAATGCGGGTATACTGGCTACTGGTTGATAAGCG +>64 +AGATCCGTTTGTTCCGGACGGTCGTCGTACCCACCCCTTGTCGATAGGTAAAGGAGTAA +>57 +TAAGCGACCATCAGAAACACAGCATCAGCTTACCAGCCTTTCGTGGTACACCCGGAGTC +>1 +GATCATCGAATCGGGCTGTCGCCAAAGGCCGACCAAGGTTCCCGTGGGGCGGGCTAATT +>55 +GATTGGTGGTGAATGACTCACAAAATGCTCATAGAATATTAGATCTGCTTGGGTGTATC +>67 +GAATCACGGGATTTATACATCATTTATAGCTAAATTACACCTGTCCCAACGGTAACAAT +>14 +CTATTGTCGCTCCCTTAGTTGTTGGGCGTAGTCCGCACCTAGAGTCCAACCAGGCCTCG +>15 +AGAGTCCAACCAGGCCTCGACAATCCTTTGTCCTGTCCCCCGGAAGCCCAATGAGTTGT +>59 +TTTCAGCATGGGTCGCATTCCTACCTAAGGCTAGGGGCATGTTTTTGTCATCGTGCATA +>28 +CGCGAGAGGTTTCAGGTACCTATCGGGACAGACTTGTTTCTGGAAGCGCCCTCCGCTCA +>74 +CCCCATAGTCTTAATAACCGACACCGAGACGCTACATGGCGTCGGTGCCCGCGTGCGGA +>4 +TGTAACCTGTGTGGGGTCGGTCCTGGGGAAACTTTGGGTTTCATTACGGGGTGTCCATC +>65 +TCGATAGGTAAAGGAGTAAGCGTCCGACTCCCTCTTACTTGGCGCGACCAATATTCATT +>6 +ATAGAAATGAGGAGGATTCACAGACACGTCAGTCACCATCAAACGTCTAAGTAATCATG +>44 +CCAACCATGGTAGGTTAGGAAAGCCGCCAAATAAGTTCTTATACGCCACTCGACTTGGC +>3 +TCAACTCTACCCAAGCTCCTTGCATCTCGGTACCCCCCCTTGTAACCTGTGTGGGGTCG
--- a/test-data/test-abund-read-2.ct.info Mon Aug 18 07:02:05 2014 -0400 +++ b/test-data/test-abund-read-2.ct.info Tue Jul 07 11:59:39 2015 -0400 @@ -1,2 +1,3 @@ -through end: test-abund-read-2.fa +through test-data/test-abund-read-2.fa fp rate estimated to be 0.000 +
--- a/test-data/test-abund-read-2.nobigcount.ct.info Mon Aug 18 07:02:05 2014 -0400 +++ b/test-data/test-abund-read-2.nobigcount.ct.info Tue Jul 07 11:59:39 2015 -0400 @@ -1,2 +1,3 @@ -through end: test-abund-read-2.fa +through test-data/test-abund-read-2.fa fp rate estimated to be 0.000 +
--- a/test-data/test-abund-read-paired.fa Mon Aug 18 07:02:05 2014 -0400 +++ b/test-data/test-abund-read-paired.fa Tue Jul 07 11:59:39 2015 -0400 @@ -2,11 +2,11 @@ GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG >895:1:37:17593:9954/2 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>895:1:37:17593:9954 1::FOO +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG +>895:1:37:17593:9954 2::FOO +GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG >895:1:37:17593:9954/1 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG >895:1:37:17593:9954/2 GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG ->895:1:37:17593:9954/1 -GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG ->895:1:37:17593:9954/2 -GGTTGACGGGGCTCAGGGGGCGGCTGACTCCGAGAGACAGCAGCCGCAGCTGTCGTCAGGGGATTTCCGGGGCGGAGGCCGCAGACGCGAGTGGTGGAGGGAGAAGGCCTGACG
--- a/tool_dependencies.xml Mon Aug 18 07:02:05 2014 -0400 +++ b/tool_dependencies.xml Tue Jul 07 11:59:39 2015 -0400 @@ -1,9 +1,14 @@ <?xml version="1.0"?> <tool_dependency> - <package name="khmer" version="1.1"> + <package name="khmer" version="2.0rc1"> <install version="1.0"> - <actions> - <action type="shell_command">easy_install -U setuptools==3.4.1; pip install --user khmer==1.1 || pip install khmer==1.1</action> + <actions> + <action type="setup_python_environment"> + <repository changeset_revision="44e7542c51e6" name="package_python_2_7" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> + <package name="python" version="2.7" /> + </repository> + <package>https://pypi.python.org/packages/source/k/khmer/khmer-2.0rc1.tar.gz#md5=d8ea5e3ba34de0380007c74d61fc6d1a</package> + </action> </actions> </install> </package>