changeset 10:d00e18a0a3f2

More scripts
author Michael R. Crusoe <mcrusoe@msu.edu>
date Sun, 29 Jun 2014 09:22:32 -0400
parents 50e354731c85
children cec78b574760
files abundance-dist-single.xml abundance-dist.xml count-median.xml filter-abund.xml macros.xml tool_dependencies.xml
diffstat 6 files changed, 363 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/abundance-dist-single.xml	Sun Jun 29 09:22:32 2014 -0400
@@ -0,0 +1,67 @@
+<tool	id="gedlab-khmer-normalize-by-median"
+	name="Abundance Distribution (all-in-one)"
+	version="1.1-1"
+	force_history_refresh="true">
+	
+	<description>
+		Calculate abundance distribution of the k-mers in a given sequence 
+		file.
+	</description>
+	<macros>
+		<token name="@BINARY@">abundance-dist-single.py</token>
+	</macros>
+	<expand macro="requirements" />
+	<command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+mkdir output; cd output;
+@BINARY@
+@TABLEPARAMS@
+$no_zero
+$no_bigcount
+#if $save_countingtable
+--savetable=$countingtable_to_save
+#end if
+--report-total-kmers
+$input_sequence_filename
+$output_histogram_filename	
+	</command>
+
+	<inputs>
+		<expand macro="input_sequence_filename" />
+		<param	name="save_countingtable"
+			type="boolean"
+			label="Save the k-mer counting table(s) in a file"
+			help="" />
+		<expand macro="tableinputs" />
+	</inputs>
+	<outputs>
+		<expand macro="abundance-histogram-output" />	
+	</outputs>
+ 	<stdio>
+        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
+		<exit_code	range="1:"
+				level="fatal" />
+	</stdio>
+    
+    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
+    <tests>
+        <!-- [HELP] Test files have to be in the ~/test-data directory
+        <test>
+            <param name="input" value="input_test.tab" />
+            <param name="int"   value="10" />
+            <output name="output" file="output_test.txt" />
+	</test> -->
+        <!-- [HELP] Multiple tests can be defined with different parameters -->
+<!--
+        <test>
+        </test>
+-->
+    </tests>
+    
+    <!-- [OPTIONAL] Help displayed in Galaxy -->
+    <!--
+	<help>
+	</help>
+    -->    
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/abundance-dist.xml	Sun Jun 29 09:22:32 2014 -0400
@@ -0,0 +1,57 @@
+<tool	id="gedlab-khmer-normalize-by-median"
+	name="Abundance Dist"
+	version="1.1-1"
+	force_history_refresh="true">
+	
+	<description>
+		Calculate abundance distribution of the k-mers in a given sequence
+		file using a pre-made k-mer counting table.
+	</description>
+        <macros>
+                <token name="@BINARY@">abundance-dist-single.py</token>
+        </macros>
+        <expand macro="requirements" />
+	<command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+mkdir output; cd output;
+@BINARY@
+		$input_counting_table_filename
+		$input_sequence_filename
+		$output_histogram_filename	
+	</command>
+
+	<inputs>
+		<expand macro="input_sequence_filename" />
+		<expand macro="input_counting_table_filename" />
+	</inputs>
+	<outputs>
+		<expand macro="abundance-histogram-output" />
+	</outputs>
+ 	<stdio>
+        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
+		<exit_code	range="1:"
+				level="fatal" />
+	</stdio>
+    
+    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
+    <tests>
+        <!-- [HELP] Test files have to be in the ~/test-data directory
+        <test>
+            <param name="input" value="input_test.tab" />
+            <param name="int"   value="10" />
+            <output name="output" file="output_test.txt" />
+	</test> -->
+        <!-- [HELP] Multiple tests can be defined with different parameters -->
+<!--
+        <test>
+        </test>
+-->
+    </tests>
+    
+    <!-- [OPTIONAL] Help displayed in Galaxy -->
+    <!--
+	<help>
+	</help>
+    -->    
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/count-median.xml	Sun Jun 29 09:22:32 2014 -0400
@@ -0,0 +1,58 @@
+<tool	id="gedlab-khmer-normalize-by-median"
+	name="Count Median"
+	version="1.1-1"
+	force_history_refresh="true">
+	
+	<description>
+		Count the median/avg k-mer abundance for each sequence in the input file,
+		based on the k-mer counts in the given k-mer counting table. Can be used to
+		estimate expression levels (mRNAseq) or coverage (genomic/metagenomic).
+	</description>
+        <macros>
+                <token name="@BINARY@">count-median.py</token>
+        </macros>
+        <expand macro="requirements" />
+	<command>
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+mkdir output; cd output;
+@BINARY@
+		$input_counting_table_filename
+		$input_sequence_filename
+		$output_summary_filename	
+	</command>
+
+	<inputs>
+		<expand macro="input_sequence_filename" />
+		<expand macro="input_counting_table_filename" />
+	</inputs>
+	<outputs>
+		<data name="output_summary_filename" format="text" label="${input_sequence_filename} sequence id, median, average, stddev, and seq length" />
+	</outputs>
+ 	<stdio>
+        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
+		<exit_code	range="1:"
+				level="fatal" />
+	</stdio>
+    
+    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
+    <tests>
+        <!-- [HELP] Test files have to be in the ~/test-data directory
+        <test>
+            <param name="input" value="input_test.tab" />
+            <param name="int"   value="10" />
+            <output name="output" file="output_test.txt" />
+	</test> -->
+        <!-- [HELP] Multiple tests can be defined with different parameters -->
+<!--
+        <test>
+        </test>
+-->
+    </tests>
+    
+    <!-- [OPTIONAL] Help displayed in Galaxy -->
+    <!--
+	<help>
+	</help>
+    -->    
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter-abund.xml	Sun Jun 29 09:22:32 2014 -0400
@@ -0,0 +1,88 @@
+<tool	id="gedlab-khmer-filter-abund"
+	name="Filter by abundance"
+	version="1.1-1"
+	force_history_refresh="true">
+	
+	<description>
+		Trims fastq/fasta sequences at k-mers of a given abundance
+		based on a provided k-mer counting table.
+	</description>
+
+	<requirements>
+		<requirement
+			type="package"
+			version="1.1">
+				khmer
+		</requirement>
+    	</requirements>
+	
+	<version_command>
+		filter-abund.py --version
+    	</version_command>
+
+	<command>
+		mkdir output; cd output;
+		filter-abund.py
+		$variable_coverage
+		$presencetable_to_load
+		#for input in $inputs
+			$input
+		#end for
+	</command>
+
+	<inputs>
+		<param	name="inputs"
+			multiple="true"
+			type="data"
+			format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina"
+			label="FAST[AQ] file(s)"
+			help="Put in order of precedence such as longest reads first." />
+
+		<param	name="variable_coverage"
+			type="boolean"
+			checked="false"
+			truevalue="--variable-coverage"
+			falsevalue=""
+			label="Variable coverage"
+			help="Only trim when a sequence has high enough coverage (median abundance > 20)" />
+
+		<param	name="presencetable_to_load"
+			type="data"
+			optional="false"
+			label="the khmer abundance table to load"
+			help="The inputs file(s) will be processed using the kmer counts in the specified k-mer counting table file." />
+	</inputs>
+	<outputs>
+		<data	name="output"
+			format="input"
+			label="${tool.name} processed nucleotide sequence file">
+			<discover_datasets pattern="__name__" directory="output" visible="true"/>
+		</data>
+	</outputs>
+ 	<stdio>
+        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
+		<exit_code	range="1:"
+				level="fatal" />
+	</stdio>
+    
+    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
+    <tests>
+        <!-- [HELP] Test files have to be in the ~/test-data directory
+        <test>
+            <param name="input" value="input_test.tab" />
+            <param name="int"   value="10" />
+            <output name="output" file="output_test.txt" />
+	</test> -->
+        <!-- [HELP] Multiple tests can be defined with different parameters -->
+<!--
+        <test>
+        </test>
+-->
+    </tests>
+    
+    <!-- [OPTIONAL] Help displayed in Galaxy -->
+    <!--
+	<help>
+	</help>
+    -->    
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sun Jun 29 09:22:32 2014 -0400
@@ -0,0 +1,92 @@
+<macros>
+	<xml name="requirements">
+		<requirements>
+			<!-- <requirement type="binary">@BINARY@</requirement> -->
+			<requirement type="package" version="1.1">khmer</requirement>
+		</requirements>
+		<version_command>@BINARY@ --version</version_command>
+	</xml>
+	<token name="TABLEPARAMS">#if $parameters.type == "simple"
+--ksize=20
+--n_tables=4
+--min-tablesize=$parameters.tablesize
+#else
+--ksize=$parameters.ksize
+--n_tables=$parameters.n_tables
+--min-tablesize=$parameters.tablesize_specific
+#end if</token>
+	<xml name="tableinputs">
+		<conditional name="parameters">
+			<param	name="type"
+				type="select"
+				label="Advanced Parameters"
+				help="ksize, n_hashess, a specific hashsize" >
+				<option	value="simple"
+					selected="true">
+					Hide
+				</option>
+				<option value="specific">
+					Show
+				</option>
+            		</param>
+			<when	value="simple">
+				<param	name="hashsize"
+					type="select"
+					label="Sample Type"
+					display="radio">
+					<option	value="1e9"
+						selected="true">
+						Microbial Genome
+					</option>
+					<option value="2e9">
+						Animal Transcriptome
+					</option>
+					<option value="4e9">
+						Small Animal Genome or Low-Diversity Metagenome
+					</option>
+					<option value="16e9">
+						Large Animal Genome
+					</option>
+				</param>
+			</when>
+			<when	value="specific">
+				<param	name="ksize"
+					type="integer"
+					value="20"
+					label="ksize"
+					help="k-mer size to use" />
+				<param	name="n_hashes"
+					type="integer"
+					min="1"
+					value="4"
+					label="n_hashes"
+					help="number of hash tables to use" />
+				<param	name="hashsize_specifc"
+					type="text"
+					label="hashsize"
+					help="lower bound on the hashsize to use" />
+			</when>
+		</conditional>
+	</xml>
+	<xml name="input_sequence_filename">
+                <param  name="input_sequence_filename"
+                        type="data"
+                        format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina"
+                        label="FAST[AQ] file(s)" />
+	</xml>
+	<xml name="input_counting_table_filename">
+                <param  name="input_counting_table_filename"
+                        type="data"
+                        label="the k-mer counting table to query"
+                        help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer counting table." />
+	</xml>
+	<xml name="abundance-histogram-output">
+                <data   name="output_histogram_filename"
+                        format="text"
+                        label="${tool.name} k-mer abundance histogram. The
+                        columns are: (1) k-mer abundance, (2) k-mer count, (3)
+                        cumulative count, (4) fraction of total distinct
+                        k-mers.">
+                </data>
+	</xml>
+</macros>
--- a/tool_dependencies.xml	Sat Jun 28 17:25:11 2014 -0400
+++ b/tool_dependencies.xml	Sun Jun 29 09:22:32 2014 -0400
@@ -3,7 +3,7 @@
     <package name="khmer" version="1.1">
         <install version="1.0">
             <actions>
-                <action type="shell_command">easy_install -U setuptools; pip install --user khmer==1.1 || pip install khmer==1.1</action>
+                <action type="shell_command">easy_install -U setuptools==3.4.1; pip install --user khmer==1.1 || pip install khmer==1.1</action>
             </actions>
         </install>
     </package>