Repository 'khmer'
hg clone https://testtoolshed.g2.bx.psu.edu/repos/crusoe/khmer

Changeset 10:d00e18a0a3f2 (2014-06-29)
Previous changeset 9:50e354731c85 (2014-06-28) Next changeset 11:cec78b574760 (2014-06-29)
Commit message:
More scripts
modified:
tool_dependencies.xml
added:
abundance-dist-single.xml
abundance-dist.xml
count-median.xml
filter-abund.xml
macros.xml
b
diff -r 50e354731c85 -r d00e18a0a3f2 abundance-dist-single.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/abundance-dist-single.xml Sun Jun 29 09:22:32 2014 -0400
[
@@ -0,0 +1,67 @@
+<tool id="gedlab-khmer-normalize-by-median"
+ name="Abundance Distribution (all-in-one)"
+ version="1.1-1"
+ force_history_refresh="true">
+
+ <description>
+ Calculate abundance distribution of the k-mers in a given sequence 
+ file.
+ </description>
+ <macros>
+ <token name="@BINARY@">abundance-dist-single.py</token>
+ </macros>
+ <expand macro="requirements" />
+ <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+mkdir output; cd output;
+@BINARY@
+@TABLEPARAMS@
+$no_zero
+$no_bigcount
+#if $save_countingtable
+--savetable=$countingtable_to_save
+#end if
+--report-total-kmers
+$input_sequence_filename
+$output_histogram_filename
+ </command>
+
+ <inputs>
+ <expand macro="input_sequence_filename" />
+ <param name="save_countingtable"
+ type="boolean"
+ label="Save the k-mer counting table(s) in a file"
+ help="" />
+ <expand macro="tableinputs" />
+ </inputs>
+ <outputs>
+ <expand macro="abundance-histogram-output" />
+ </outputs>
+  <stdio>
+        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
+ <exit_code range="1:"
+ level="fatal" />
+ </stdio>
+    
+    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
+    <tests>
+        <!-- [HELP] Test files have to be in the ~/test-data directory
+        <test>
+            <param name="input" value="input_test.tab" />
+            <param name="int"   value="10" />
+            <output name="output" file="output_test.txt" />
+ </test> -->
+        <!-- [HELP] Multiple tests can be defined with different parameters -->
+<!--
+        <test>
+        </test>
+-->
+    </tests>
+    
+    <!-- [OPTIONAL] Help displayed in Galaxy -->
+    <!--
+ <help>
+ </help>
+    -->    
+</tool>
b
diff -r 50e354731c85 -r d00e18a0a3f2 abundance-dist.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/abundance-dist.xml Sun Jun 29 09:22:32 2014 -0400
[
@@ -0,0 +1,57 @@
+<tool id="gedlab-khmer-normalize-by-median"
+ name="Abundance Dist"
+ version="1.1-1"
+ force_history_refresh="true">
+
+ <description>
+ Calculate abundance distribution of the k-mers in a given sequence
+ file using a pre-made k-mer counting table.
+ </description>
+        <macros>
+                <token name="@BINARY@">abundance-dist-single.py</token>
+        </macros>
+        <expand macro="requirements" />
+ <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+mkdir output; cd output;
+@BINARY@
+ $input_counting_table_filename
+ $input_sequence_filename
+ $output_histogram_filename
+ </command>
+
+ <inputs>
+ <expand macro="input_sequence_filename" />
+ <expand macro="input_counting_table_filename" />
+ </inputs>
+ <outputs>
+ <expand macro="abundance-histogram-output" />
+ </outputs>
+  <stdio>
+        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
+ <exit_code range="1:"
+ level="fatal" />
+ </stdio>
+    
+    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
+    <tests>
+        <!-- [HELP] Test files have to be in the ~/test-data directory
+        <test>
+            <param name="input" value="input_test.tab" />
+            <param name="int"   value="10" />
+            <output name="output" file="output_test.txt" />
+ </test> -->
+        <!-- [HELP] Multiple tests can be defined with different parameters -->
+<!--
+        <test>
+        </test>
+-->
+    </tests>
+    
+    <!-- [OPTIONAL] Help displayed in Galaxy -->
+    <!--
+ <help>
+ </help>
+    -->    
+</tool>
b
diff -r 50e354731c85 -r d00e18a0a3f2 count-median.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/count-median.xml Sun Jun 29 09:22:32 2014 -0400
[
@@ -0,0 +1,58 @@
+<tool id="gedlab-khmer-normalize-by-median"
+ name="Count Median"
+ version="1.1-1"
+ force_history_refresh="true">
+
+ <description>
+ Count the median/avg k-mer abundance for each sequence in the input file,
+ based on the k-mer counts in the given k-mer counting table. Can be used to
+ estimate expression levels (mRNAseq) or coverage (genomic/metagenomic).
+ </description>
+        <macros>
+                <token name="@BINARY@">count-median.py</token>
+        </macros>
+        <expand macro="requirements" />
+ <command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+mkdir output; cd output;
+@BINARY@
+ $input_counting_table_filename
+ $input_sequence_filename
+ $output_summary_filename
+ </command>
+
+ <inputs>
+ <expand macro="input_sequence_filename" />
+ <expand macro="input_counting_table_filename" />
+ </inputs>
+ <outputs>
+ <data name="output_summary_filename" format="text" label="${input_sequence_filename} sequence id, median, average, stddev, and seq length" />
+ </outputs>
+  <stdio>
+        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
+ <exit_code range="1:"
+ level="fatal" />
+ </stdio>
+    
+    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
+    <tests>
+        <!-- [HELP] Test files have to be in the ~/test-data directory
+        <test>
+            <param name="input" value="input_test.tab" />
+            <param name="int"   value="10" />
+            <output name="output" file="output_test.txt" />
+ </test> -->
+        <!-- [HELP] Multiple tests can be defined with different parameters -->
+<!--
+        <test>
+        </test>
+-->
+    </tests>
+    
+    <!-- [OPTIONAL] Help displayed in Galaxy -->
+    <!--
+ <help>
+ </help>
+    -->    
+</tool>
b
diff -r 50e354731c85 -r d00e18a0a3f2 filter-abund.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-abund.xml Sun Jun 29 09:22:32 2014 -0400
[
@@ -0,0 +1,88 @@
+<tool id="gedlab-khmer-filter-abund"
+ name="Filter by abundance"
+ version="1.1-1"
+ force_history_refresh="true">
+
+ <description>
+ Trims fastq/fasta sequences at k-mers of a given abundance
+ based on a provided k-mer counting table.
+ </description>
+
+ <requirements>
+ <requirement
+ type="package"
+ version="1.1">
+ khmer
+ </requirement>
+     </requirements>
+
+ <version_command>
+ filter-abund.py --version
+     </version_command>
+
+ <command>
+ mkdir output; cd output;
+ filter-abund.py
+ $variable_coverage
+ $presencetable_to_load
+ #for input in $inputs
+ $input
+ #end for
+ </command>
+
+ <inputs>
+ <param name="inputs"
+ multiple="true"
+ type="data"
+ format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina"
+ label="FAST[AQ] file(s)"
+ help="Put in order of precedence such as longest reads first." />
+
+ <param name="variable_coverage"
+ type="boolean"
+ checked="false"
+ truevalue="--variable-coverage"
+ falsevalue=""
+ label="Variable coverage"
+ help="Only trim when a sequence has high enough coverage (median abundance > 20)" />
+
+ <param name="presencetable_to_load"
+ type="data"
+ optional="false"
+ label="the khmer abundance table to load"
+ help="The inputs file(s) will be processed using the kmer counts in the specified k-mer counting table file." />
+ </inputs>
+ <outputs>
+ <data name="output"
+ format="input"
+ label="${tool.name} processed nucleotide sequence file">
+ <discover_datasets pattern="__name__" directory="output" visible="true"/>
+ </data>
+ </outputs>
+  <stdio>
+        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
+ <exit_code range="1:"
+ level="fatal" />
+ </stdio>
+    
+    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
+    <tests>
+        <!-- [HELP] Test files have to be in the ~/test-data directory
+        <test>
+            <param name="input" value="input_test.tab" />
+            <param name="int"   value="10" />
+            <output name="output" file="output_test.txt" />
+ </test> -->
+        <!-- [HELP] Multiple tests can be defined with different parameters -->
+<!--
+        <test>
+        </test>
+-->
+    </tests>
+    
+    <!-- [OPTIONAL] Help displayed in Galaxy -->
+    <!--
+ <help>
+ </help>
+    -->    
+</tool>
b
diff -r 50e354731c85 -r d00e18a0a3f2 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Sun Jun 29 09:22:32 2014 -0400
[
@@ -0,0 +1,92 @@
+<macros>
+ <xml name="requirements">
+ <requirements>
+ <!-- <requirement type="binary">@BINARY@</requirement> -->
+ <requirement type="package" version="1.1">khmer</requirement>
+ </requirements>
+ <version_command>@BINARY@ --version</version_command>
+ </xml>
+ <token name="TABLEPARAMS">#if $parameters.type == "simple"
+--ksize=20
+--n_tables=4
+--min-tablesize=$parameters.tablesize
+#else
+--ksize=$parameters.ksize
+--n_tables=$parameters.n_tables
+--min-tablesize=$parameters.tablesize_specific
+#end if</token>
+ <xml name="tableinputs">
+ <conditional name="parameters">
+ <param name="type"
+ type="select"
+ label="Advanced Parameters"
+ help="ksize, n_hashess, a specific hashsize" >
+ <option value="simple"
+ selected="true">
+ Hide
+ </option>
+ <option value="specific">
+ Show
+ </option>
+             </param>
+ <when value="simple">
+ <param name="hashsize"
+ type="select"
+ label="Sample Type"
+ display="radio">
+ <option value="1e9"
+ selected="true">
+ Microbial Genome
+ </option>
+ <option value="2e9">
+ Animal Transcriptome
+ </option>
+ <option value="4e9">
+ Small Animal Genome or Low-Diversity Metagenome
+ </option>
+ <option value="16e9">
+ Large Animal Genome
+ </option>
+ </param>
+ </when>
+ <when value="specific">
+ <param name="ksize"
+ type="integer"
+ value="20"
+ label="ksize"
+ help="k-mer size to use" />
+ <param name="n_hashes"
+ type="integer"
+ min="1"
+ value="4"
+ label="n_hashes"
+ help="number of hash tables to use" />
+ <param name="hashsize_specifc"
+ type="text"
+ label="hashsize"
+ help="lower bound on the hashsize to use" />
+ </when>
+ </conditional>
+ </xml>
+ <xml name="input_sequence_filename">
+                <param  name="input_sequence_filename"
+                        type="data"
+                        format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina"
+                        label="FAST[AQ] file(s)" />
+ </xml>
+ <xml name="input_counting_table_filename">
+                <param  name="input_counting_table_filename"
+                        type="data"
+                        label="the k-mer counting table to query"
+                        help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." />
+ </xml>
+ <xml name="abundance-histogram-output">
+                <data   name="output_histogram_filename"
+                        format="text"
+                        label="${tool.name} k-mer abundance histogram. The
+                        columns are: (1) k-mer abundance, (2) k-mer count, (3)
+                        cumulative count, (4) fraction of total distinct
+                        k-mers.">
+                </data>
+ </xml>
+</macros>
b
diff -r 50e354731c85 -r d00e18a0a3f2 tool_dependencies.xml
--- a/tool_dependencies.xml Sat Jun 28 17:25:11 2014 -0400
+++ b/tool_dependencies.xml Sun Jun 29 09:22:32 2014 -0400
b
@@ -3,7 +3,7 @@
     <package name="khmer" version="1.1">
         <install version="1.0">
             <actions>
-                <action type="shell_command">easy_install -U setuptools; pip install --user khmer==1.1 || pip install khmer==1.1</action>
+                <action type="shell_command">easy_install -U setuptools==3.4.1; pip install --user khmer==1.1 || pip install khmer==1.1</action>
             </actions>
         </install>
     </package>