changeset 37:17f10d28dab3

Uploaded
author yhoogstrate
date Tue, 04 Mar 2014 07:50:19 -0500
parents b578aaede79b
children 48c78adade03
files samtools-parallel-mpileup.xml tool_data_table_conf.xml.sample tool_dependencies.xml varscan_mpileup2snp.xml varscan_mpileup2snp_from_bam.xml
diffstat 5 files changed, 464 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools-parallel-mpileup.xml	Tue Mar 04 07:50:19 2014 -0500
@@ -0,0 +1,222 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<tool id="samtools_parallel_mpileup" name="Samtools parallel mpileup">
+	<description>Samtools mpileup (classical or supporting parallelization).</description>
+	<requirements>
+		<requirement type="package" version="0.1.19">samtools-parallel-mpileup</requirement>
+		<requirement type="package" version="0.1.19">samtools</requirement>
+	</requirements>
+	<command>
+		#if $reference_genome_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1
+			echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/all_fasta.loc' is configured properly!" >&amp;2
+		#else
+			#if $mpileup_parallelization.mpileup_parallelization_select == "true"
+				samtools-parallel-mpileup mpileup
+				-t $mpileup_parallelization.samtools_threads
+			#else
+				samtools mpileup
+			#end if
+				-f 
+					#if $reference_genome_source.source_select == "indexed_filtered"
+						"$reference_genome_source.reference_genome"
+					#else if $reference_genome_source.source_select == "indexed_all"
+						"$reference_genome_source.reference_genome"
+					#else if $reference_genome_source.source_select == "history"
+						"$reference_genome_source.reference_genome"
+					#else
+						<!--
+							This is a workaround to obtain the "genome.fa" file that
+							corresponds to the dbkey of the alignments.
+							Because this file is "calculated" during run-time, it can
+							be used in a workflow.
+						-->
+						"${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }"
+					#end if
+			
+			#if $extended_parameters_regions.samtools_regions == "region"
+				-r $extended_parameters_regions.$samtools_r
+			#elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed"
+				-l $extended_parameters_regions.$samtools_l
+			#end if
+			
+			#if $extended_parameters.parameters == "extended"
+				$extended_parameters.samtools_6
+				$extended_parameters.samtools_A
+				$extended_parameters.samtools_B
+				 -C $extended_parameters.samtools_C
+				 -d $extended_parameters.samtools_d
+				$extended_parameters.samtools_E
+				 -M $extended_parameters.samtools_M
+				$extended_parameters.samtools_R
+				 -q $extended_parameters.samtools_q
+				 -Q $extended_parameters.samtools_Q
+				
+				 -e $extended_parameters.samtools_e
+				 -F $extended_parameters.samtools_F
+				 -h $extended_parameters.samtools_h
+				$extended_parameters.samtools_I
+				 -L $extended_parameters.samtools_L
+				 -m $extended_parameters.samtools_m
+				 -o $extended_parameters.samtools_o
+				$extended_parameters.samtools_p
+				 -P $extended_parameters.samtools_P
+			#end if
+			
+			#for $alignment in $alignments
+				 ${alignment}
+			#end for
+			
+			 2> stderr_1.txt
+			 > $output ;
+			 cat stderr_1.txt
+		#end if
+	</command>
+	
+	<inputs>
+		<param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file" help="Mapped reads in BAM or SAM format."/>
+		
+		<!-- Find out how to access the reference genome from the BAM file(s) -->
+		<conditional name="reference_genome_source">
+			<param name="source_select" type="select" label="Fasta Source">
+				<option value="indexed_filtered">Use a built-in index (which fits your reference)</option>
+				<option value="history">Use reference from the history</option>
+				<option value="indexed_all">Use a built-in index (entire list) - avoid this option if possible; only useful if you design a workflow</option>
+				<option value="attribute">Use a built-in index based on the 'metadata.dbkey' attribute; ideal in workflows</option>
+			</param>
+			<when value="indexed_filtered">
+				<param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" >
+					<options from_file="all_fasta.loc">
+						<column name="name" index="0"/>
+						<column name="dbkey" index="1"/>
+						<column name="value" index="3"/>
+						<filter type="data_meta" ref="alignments" multiple="false" key="dbkey" column="1" />
+						<validator type="no_options" message="No indexes are available for the selected input dataset" />
+					</options>
+				</param>
+			</when>
+			<when value="history">
+				<param name="reference_genome" format="fasta" type="data" label="Reference Genome used during alignment (fasta)" help="Reference genome (genome.fa) that corresponds to the *.bam file." />
+			</when>
+			<when value="indexed_all">
+				<param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" >
+					<options from_file="all_fasta.loc">
+						<column name="name" index="0"/>
+						<column name="dbkey" index="1"/>
+						<column name="value" index="3"/>
+						<validator type="no_options" message="No indexes are available for the selected input dataset" />
+					</options>
+				</param>
+			</when>
+			<when value="attribute" />
+		</conditional>
+		
+		<conditional name="extended_parameters_regions">
+			<param name="samtools_regions" type="select" label="Region specific parameters" help="Let samtools target specific genomic locations.">
+				<option value="entire_genome">Entire genome</option>
+				<option value="region">Specific region</option>
+				<option value="regions_file_pos">Specific positions (file); list of positions</option>
+				<option value="regions_file_bed">Specific regions (file); list of regions in BED</option>
+			</param>
+			<when value="entire_genome">
+			</when>
+			<when value="region">
+				<param type="text" name="samtools_r" label="Samtools: region in which pileup is generated" help="chr:pos or chr:start-end" />
+			</when>
+			<when value="regions_file_pos">
+				<param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" />
+			</when>
+			<when value="regions_file_bed">
+				<param type="data" name="samtools_l" format="bed"	 label="Samtools: specific regions (BED)" />
+			</when>
+		</conditional>
+		
+		<conditional name="mpileup_parallelization">
+			<param name="mpileup_parallelization_select" type="select" label="Use parallelization for the mpileup generation (experimental)" help="Especially if larger numbers of bam/sam files are processed, or the file infrastructure is optimized for IO-paralellization, this feature might improve performance.">
+				<option value="false" >False - uses classical samtools</option>
+				<option value="true">True - uses (experimental) samtools mpileup-parallel</option>
+			</param>
+			<when value="false" />
+			<when value="true">
+				<param type="integer" name="samtools_threads" value="2" min="1" label="Samtools: mpileup threads" />
+			</when>
+		</conditional>
+		
+		<conditional name="extended_parameters">
+			<param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings.">
+				<option value="default">Default settings</option>
+				<option value="extended">Extended settings</option>
+			</param>
+			<when value="default">
+			</when>
+			<when value="extended">
+				<param type="boolean" name="samtools_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" />
+				<param type="boolean" name="samtools_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" />
+				<param type="boolean" name="samtools_B" falsevalue="" truevalue=" -B" label="Samtools: disable BAQ computation" />
+				<param type="integer" name="samtools_C" value="0"					 label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" />
+				<param type="integer" name="samtools_d" value="250"				   label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" />
+				<param type="boolean" name="samtools_E" falsevalue="" truevalue=" -E" label="Samtools: recalculate extended BAQ on the fly thus ignoring existing BQs" />
+				<param type="integer" name="samtools_M" value="60"					label="cap mapping quality at INT [60]" />
+				<param type="boolean" name="samtools_R" falsevalue="" truevalue=" -R" label="Samtools: ignore RG tags" />
+				<param type="integer" name="samtools_q" value="0"					 label="Samtools: skip alignments with mapQ smaller than INT [0]" />
+				<param type="integer" name="samtools_Q" value="13"					label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" />
+				
+				<param type="integer" name="samtools_e" value="20"					label="Samtools: Phred-scaled gap extension seq error probability [20]" />
+				<param type="float"   name="samtools_F" value="0.002"				 label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
+				<param type="integer" name="samtools_h" value="100"				   label="Samtools: coefficient for homopolymer errors [100]" />
+				<param type="boolean" name="samtools_I" falsevalue="" truevalue=" -I" label="Samtools: do not perform indel calling" />
+				<param type="integer" name="samtools_L" value="250"				   label="Samtools: max per-sample depth for INDEL calling [250]" />
+				<param type="integer" name="samtools_m" value="1"					 label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" />
+				<param type="integer" name="samtools_o" value="40"					label="Samtools: Phred-scaled gap open sequencing error probability [40]" />
+				<param type="boolean" name="samtools_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" />
+				<param type="text"    name="samtools_P" value="all"				   label="Samtools: comma separated list of platforms for indels [all]" />
+			</when>
+		</conditional>
+		
+		<param type="integer" name="samtools_threads" value="2" min="1" label="Samtools: mpileup threads" />
+	</inputs>
+	
+	<outputs>
+		<data format="mpileup" name="output" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])}" />
+	</outputs>
+	
+	<help>
+VarScan2.3.6::
+
+*VarScan2 Overview*
+
+VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems.
+http://dx.doi.org/10.1101/gr.129684.111
+http://www.ncbi.nlm.nih.gov/pubmed/19542151
+
+*VarScan* requires mpileup formatted input files, which are generally derived from BAM files. Since mpileup files can become humongous, the interim step of storing it is bypassed. Thus, in this wrapper one or multiple BAM/SAM files go in, get processed into a mpileup file and get directly linked to VarScan.
+The samtools package is not able to parallelize the mpileup generation which make it a very slow process.
+Other people were aware of this and have written a version that can do parallelization:
+https://github.com/mydatascience/parallel-mpileup
+
+Consequently, when a BAM files gets processed by this wrapper, it's processed by *parallel-mpileup* before its send to VarScan.
+
+.. _VarScan: http://varscan.sourceforge.net/
+
+**Input formats**
+
+VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing.
+
+**Installation**
+
+Make sure your reference genomes are properly annotated in "tool-data/all_fasta.loc", and linked to the names of the reference used for alignment.
+
+**License**
+
+* VarScan2.3.6: Non-Profit Open Software License 3.0 (Non-Profit OSL 3.0)
+* parallel-mpileup: MIT License (https://github.com/mydatascience/parallel-mpileup/blob/master/samtools-0.1.19/COPYING)
+* samtool: MIT License
+
+
+**Contact**
+
+The tool wrapper has been written by Youri Hoogstrate from the Erasmus Medical Center (Rotterdam, Netherlands) on behalf of the Translational Research IT (TraIT) project:
+http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch
+
+More tools by the Translational Research IT (TraIT) project can be found in the following repository:
+http://toolshed.dtls.nl/
+</help>
+</tool>
--- a/tool_data_table_conf.xml.sample	Wed Feb 19 02:46:43 2014 -0500
+++ b/tool_data_table_conf.xml.sample	Tue Mar 04 07:50:19 2014 -0500
@@ -1,5 +1,8 @@
-<?xml version="1.0"?>
-<table name="all_fasta" comment_char="#">
-	<columns>name, dbkey, display_name, value</columns>
-	<file path="tool-data/all_fasta.loc.sample" /> 
-</table>
\ No newline at end of file
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+	<!-- Locations of all fasta files under genome directory -->
+	<table name="all_fasta" comment_char="#">
+		<columns>name, dbkey, display_name, value</columns>
+		<file path="tool-data/all_fasta.loc" /> 
+	</table>
+</tables>
\ No newline at end of file
--- a/tool_dependencies.xml	Wed Feb 19 02:46:43 2014 -0500
+++ b/tool_dependencies.xml	Tue Mar 04 07:50:19 2014 -0500
@@ -1,11 +1,12 @@
 <?xml version="1.0"?>
 <tool_dependency>
-	<package name="samtools-mpileup-parallel" version="1.0.19">
+	<!-- Optimized samtools (for threaded mpileup) -->
+	<package name="samtools-parallel-mpileup" version="0.1.19">
 		<install version="1.0">
 			<actions>
-				<action type="shell_command">svn checkout https://github.com/mydatascience/parallel-mpileup/trunk samtools-mpileup-parallel &amp;&amp; cd samtools-mpileup-parallel &amp;&amp; cd $(ls |grep samtools-) &amp;&amp; make &amp;&amp; cp samtools ../samtools-mpileup-parallel</action>
+				<action type="shell_command">svn checkout https://github.com/yhoogstrate/parallel-mpileup/trunk samtools-parallel-mpileup &amp;&amp; cd samtools-parallel-mpileup &amp;&amp; cd $(ls |grep samtools-) &amp;&amp; make &amp;&amp; cp samtools ../samtools-parallel-mpileup</action>
 				<action type="move_file">
-					<source>samtools-mpileup-parallel</source>
+					<source>samtools-parallel-mpileup</source>
 					<destination>$INSTALL_DIR/bin</destination>
 				</action>
 				<action type="set_environment">
@@ -18,6 +19,87 @@
 			Downloads and installs a modified version of samtools, able to paralellize the mpileup function.
 		</readme>
 	</package>
+	
+	<!-- Classical samtools (doesn't support threaded mpileup) -->
+    <package name="samtools" version="0.1.19">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://downloads.sourceforge.net/project/samtools/samtools/0.1.19/samtools-0.1.19.tar.bz2</action>
+                <action type="shell_command">sed -i.bak 's/-lcurses/-lncurses/' Makefile</action>
+                <action type="shell_command">make</action>
+                <action type="shell_command">chmod ugo+rx misc/*.p?</action>
+                <action type="shell_command">mkdir misc/bin</action>
+                <action type="shell_command">cp -p `find misc -type f -perm -555` misc/bin/</action>
+                <action type="move_file">
+                    <source>samtools</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>bcftools/bcftools</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>bcftools/vcfutils.pl</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_directory_files">
+                    <source_directory>misc/bin</source_directory>
+                    <destination_directory>$INSTALL_DIR/bin</destination_directory>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+Program: samtools (Tools for alignments in the SAM format)
+Version: 0.1.19 
+
+Usage:   samtools &lt;command&gt; [options]
+
+Command: view        SAM&lt;-&gt;BAM conversion
+         sort        sort alignment file
+         mpileup     multi-way pileup
+         depth       compute the depth
+         faidx       index/extract FASTA
+         tview       text alignment viewer
+         index       index alignment
+         idxstats    BAM index stats (r595 or later)
+         fixmate     fix mate information
+         flagstat    simple stats
+         calmd       recalculate MD/NM tags and '=' bases
+         merge       merge sorted alignments
+         rmdup       remove PCR duplicates
+         reheader    replace BAM header
+         cat         concatenate BAMs
+         targetcut   cut fosmid regions (for fosmid pool only)
+         phase       phase heterozygotes
+
+This also installs bcftools and misc utility commands:
+        bcftools
+        vcfutils.pl
+        ace2sam
+        bamcheck
+        blast2sam.pl
+        bowtie2sam.pl
+        export2sam.pl
+        interpolate_sam.pl
+        maq2sam-long
+        maq2sam-short
+        md5fa
+        md5sum-lite
+        novo2sam.pl
+        psl2sam.pl
+        sam2vcf.pl
+        samtools.pl
+        soap2sam.pl
+        varfilter.py
+        wgsim
+        wgsim_eval.pl
+        zoom2sam.pl
+        </readme>
+    </package>
+	
 	<package name="VarScan" version="2.3.6">
 		<install version="1.0">
 			<actions>
@@ -35,4 +117,4 @@
 			Downloads VarScan2.
 		</readme>
 	</package>
-</tool_dependency>
+</tool_dependency>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan_mpileup2snp.xml	Tue Mar 04 07:50:19 2014 -0500
@@ -0,0 +1,103 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<tool id="varscan_mpileup2snp" name="VarScan2 Call SNPs from mpileup file(s)">
+	<description>VarScan2 SNP/SNV detection; directly reading *.mpileup file(s).</description>
+	<requirements>
+		<requirement type="package" version="2.3.6">VarScan</requirement>
+	</requirements>
+	<command>
+		cat $mpileup_input | java
+				 -Xmx64G
+				 -jar \$JAVA_JAR_PATH/VarScan.v2.3.6.jar
+					 mpileup2snp
+		 
+		#if $extended_parameters.parameters == "extended"
+				 --min-coverage	 $varscan_min_coverage
+				 --min-reads2	   $varscan_min_reads2
+				 --min-avg-qual	 $varscan_min_avg_qual
+				 --min-var-freq	 $varscan_min_var_freq
+				 --min-freq-for-hom $varscan_min_freq_for_hom
+				 --p-value		  $varscan_p_value
+				$varscan_strand_filter
+				$varscan_output_vcf
+				$varscan_variants
+		#end if
+		
+		 --output-vcf $varscan_output_vcf
+		 > $snv_output
+		 2> &amp;1
+	</command>
+	
+	<inputs>
+		<param format="mpileup" multiple="true" name="mpileup_input" type="data" label="Alignment file" help="Mapped reads in BAM or SAM format."/>
+		
+		<conditional name="extended_parameters">
+			<param name="parameters" type="select" label="VarScan parameters" help="For more advanced VarScan settings.">
+				<option value="default">Default settings</option>
+				<option value="extended">Extended settings</option>
+			</param>
+			<when value="default">
+			</when>
+			<when value="extended">
+				<param type="integer" name="varscan_min_coverage"	 value="8"	label="VarScan: Minimum read depth at a position to make a call [8]" />
+				<param type="integer" name="varscan_min_reads2"	   value="2"	label="VarScan: PMinimum supporting reads at a position to call variants [2]" />
+				<param type="integer" name="varscan_min_avg_qual"	 value="15"   label="VarScan: Minimum base quality at a position to count a read [15]" />
+				<param type="float"   name="varscan_min_var_freq"	 value="0.01" label="VarScan: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
+				<param type="float"   name="varscan_min_freq_for_hom" value="0.75" label="VarScan: Minimum frequency to call homozygote [0.75]" />
+				<param type="float"   name="varscan_p_value"		  value="0.99" label="VarScan: Default p-value threshold for calling variants [99e-02]" />
+				<param type="boolean" name="varscan_strand_filter"	falsevalue=" --strand_filter 0" truevalue=" --strand_filter 1" checked="true" label="VarScan: Ignore variants with >90% support on one strand [1]" />
+				<param type="boolean" name="varscan_variants"		 falsevalue=" --variants 0"	  truevalue=" --variants 1"					 label="VarScan: Report only variant (SNP/indel) positions [0]" />
+			</when>
+		</conditional>
+		
+		<param type="boolean" name="varscan_output_vcf" falsevalue="0" truevalue="1" label="VarScan: If set to 1, outputs in VCF format" />
+	</inputs>
+	
+	<outputs>
+		<data format="tabular" name="snv_output" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])}">
+			<change_format>
+				<when input="varscan_output_vcf" value="1" format="vcf" />
+			</change_format>
+		</data>
+	</outputs>
+	
+	<help>
+VarScan2.3.6::
+
+*VarScan2 Overview*
+
+VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems.
+http://dx.doi.org/10.1101/gr.129684.111
+http://www.ncbi.nlm.nih.gov/pubmed/19542151
+
+*VarScan* requires mpileup formatted input files, which are generally derived from BAM files. Since mpileup files can become humongous, the interim step of storing it is bypassed. Thus, in this wrapper one or multiple BAM/SAM files go in, get processed into a mpileup file and get directly linked to VarScan.
+The samtools package is not able to parallelize the mpileup generation which make it a very slow process.
+Other people were aware of this and have written a version that can do parallelization:
+https://github.com/mydatascience/parallel-mpileup
+
+Consequently, when a BAM files gets processed by this wrapper, it's processed by *parallel-mpileup* before its send to VarScan.
+
+.. _VarScan: http://varscan.sourceforge.net/
+
+**Input formats**
+
+VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing.
+
+**Installation**
+
+Make sure your reference genomes are properly annotated in "tool-data/all_fasta.loc", and linked to the names of the reference used for alignment.
+
+**License**
+
+* VarScan2.3.6: Non-Profit Open Software License 3.0 (Non-Profit OSL 3.0)
+* parallel-mpileup: MIT License (https://github.com/mydatascience/parallel-mpileup/blob/master/samtools-0.1.19/COPYING)
+
+
+**Contact**
+
+The tool wrapper has been written by Youri Hoogstrate from the Erasmus Medical Center (Rotterdam, Netherlands) on behalf of the Translational Research IT (TraIT) project:
+http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch
+
+More tools by the Translational Research IT (TraIT) project can be found in the following repository:
+http://toolshed.dtls.nl/
+</help>
+</tool>
--- a/varscan_mpileup2snp_from_bam.xml	Wed Feb 19 02:46:43 2014 -0500
+++ b/varscan_mpileup2snp_from_bam.xml	Tue Mar 04 07:50:19 2014 -0500
@@ -2,15 +2,20 @@
 <tool id="varscan_mpileup2snp_from_bam" name="VarScan2 Call SNPs from BAM">
 	<description>VarScan2 SNP/SNV detection; directly reading *.bam file(s) &amp; using parallel mpileup generation, to avoid unncessairy I/O overhead and increase performance.</description>
 	<requirements>
-		<requirement type="package" version="1.0.19">samtools-mpileup-parallel</requirement>
+		<requirement type="package" version="0.1.19">samtools-parallel-mpileup</requirement>
 		<requirement type="package" version="2.3.6">VarScan</requirement>
+		<requirement type="package" version="0.1.19">samtools</requirement>
 	</requirements>
 	<command>
 		#if $reference_genome_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1
 			echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/all_fasta.loc' is configured properly!" >&amp;2
 		#else
-			samtools-mpileup-parallel mpileup
-				-t $samtools_threads
+			#if $mpileup_parallelization.mpileup_parallelization_select == "true"
+				samtools-parallel-mpileup mpileup
+				-t $mpileup_parallelization.samtools_threads
+			#else
+				samtools mpileup
+			#end if
 				-f 
 					#if $reference_genome_source.source_select == "indexed_filtered"
 						"$reference_genome_source.reference_genome"
@@ -60,7 +65,7 @@
 			#for $alignment in $alignments
 				 ${alignment}
 			#end for
-			 2>/dev/null
+			 2> stderr_1.txt
 			 | java
 					 -Xmx64G
 					 -jar \$JAVA_JAR_PATH/VarScan.v2.3.6.jar
@@ -80,12 +85,24 @@
 			
 			 --output-vcf $varscan_output_vcf
 			 > $snv_output
-			 2>&amp;1
+			 2> stderr_2.txt ;
+			 
+			 echo "-------------------------[ mpileup generation ]-------------------------" ;
+			 echo "" ;
+			 cat stderr_1.txt ;
+			 echo "" ;
+			 echo "" ;
+			 echo "-------------------------[ VarScan SNP detect ]-------------------------" ;
+			 echo "" ;
+			 echo "" ;
+			 cat stderr_2.txt ;
+			 echo "" ;
+			 echo "------------------------------------------------------------------------" ;
 		#end if
 	</command>
 	
 	<inputs>
-		<param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file" help="Mapped reads in BAM or SAM format."/>
+		<param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file(s)" help="Mapped reads in BAM or SAM format."/>
 		
 		<!-- Find out how to access the reference genome from the BAM file(s) -->
 		<conditional name="reference_genome_source">
@@ -95,6 +112,9 @@
 				<option value="indexed_all">Use a built-in index (entire list) - avoid this option if possible; only useful if you design a workflow</option>
 				<option value="attribute">Use a built-in index based on the 'metadata.dbkey' attribute; ideal in workflows</option>
 			</param>
+			<when value="history">
+				<param name="reference_genome" format="fasta" type="data" label="Reference Genome used during alignment (fasta)" help="Reference genome (genome.fa) that corresponds to the *.bam file." />
+			</when>
 			<when value="indexed_filtered">
 				<param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" >
 					<options from_file="all_fasta.loc">
@@ -116,20 +136,20 @@
 					</options>
 				</param>
 			</when>
-			<when value="history">
-				<param name="reference_genome" format="fasta" type="data" label="Reference Genome used during alignment (fasta)" help="Reference genome (genome.fa) that corresponds to the *.bam file." />
+			<when value="attribute">
+				<!-- Do nothing -->
 			</when>
-			
 		</conditional>
 		
 		<conditional name="extended_parameters_regions">
-			<param name="samtools_regions" type="select" label="VarScan parameters" help="For more advanced VarScan settings.">
+			<param name="samtools_regions" type="select" label="Region specific parameters" help="Let samtools target specific genomic locations.">
 				<option value="entire_genome">Entire genome</option>
 				<option value="region">Specific region</option>
 				<option value="regions_file_pos">Specific positions (file); list of positions</option>
 				<option value="regions_file_bed">Specific regions (file); list of regions in BED</option>
 			</param>
 			<when value="entire_genome">
+				<!-- Do nothing -->
 			</when>
 			<when value="region">
 				<param type="text" name="samtools_r" label="Samtools: region in which pileup is generated" help="chr:pos or chr:start-end" />
@@ -138,19 +158,27 @@
 				<param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" />
 			</when>
 			<when value="regions_file_bed">
-				<param type="data" name="samtools_l" format="bed"	 label="Samtools: specific regions (BED)" />
+				<param type="data" name="samtools_l" format="bed"     label="Samtools: specific regions (BED)" />
 			</when>
 		</conditional>
 		
-		<param type="integer" name="samtools_threads" value="8" min="1" label="Samtools: mpileup threads" />
+		<conditional name="mpileup_parallelization">
+			<param name="mpileup_parallelization_select" type="select" label="Use parallelization for the mpileup generation (experimental)" help="Especially if larger numbers of bam/sam files are processed, or the file infrastructure is optimized for IO-paralellization, this feature might improve performance.">
+				<option value="false" >False - uses classical samtools</option>
+				<option value="true">True - uses (experimental) samtools mpileup-parallel</option>
+			</param>
+			<when value="false" />
+			<when value="true">
+				<param type="integer" name="samtools_threads" value="2" min="1" label="Samtools: mpileup threads" />
+			</when>
+		</conditional>
 		
 		<conditional name="extended_parameters">
-			<param name="parameters" type="select" label="VarScan parameters" help="For more advanced VarScan settings.">
+			<param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings.">
 				<option value="default">Default settings</option>
 				<option value="extended">Extended settings</option>
 			</param>
-			<when value="default">
-			</when>
+			<when value="default" />
 			<when value="extended">
 				<param type="boolean" name="samtools_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" />
 				<param type="boolean" name="samtools_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" />
@@ -171,7 +199,7 @@
 				<param type="integer" name="samtools_m" value="1"					 label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" />
 				<param type="integer" name="samtools_o" value="40"					label="Samtools: Phred-scaled gap open sequencing error probability [40]" />
 				<param type="boolean" name="samtools_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" />
-				<param type="text"	name="samtools_P" value="all"				   label="Samtools: comma separated list of platforms for indels [all]" />
+				<param type="text"    name="samtools_P" value="all"				   label="Samtools: comma separated list of platforms for indels [all]" />
 				
 				<param type="integer" name="varscan_min_coverage"	 value="8"	label="VarScan: Minimum read depth at a position to make a call [8]" />
 				<param type="integer" name="varscan_min_reads2"	   value="2"	label="VarScan: PMinimum supporting reads at a position to call variants [2]" />
@@ -233,6 +261,6 @@
 http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch
 
 More tools by the Translational Research IT (TraIT) project can be found in the following repository:
-http://toolshed.nbic.nl/
+http://toolshed.dtls.nl/
 </help>
 </tool>