Mercurial > repos > yhoogstrate > varscan_mpileup2snp_from_bam

--- a/samtools-parallel-mpileup.xml	Tue Mar 04 07:50:19 2014 -0500
+++ b/samtools-parallel-mpileup.xml	Wed Mar 05 05:42:06 2014 -0500
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <tool id="samtools_parallel_mpileup" name="Samtools parallel mpileup">
-	<description>Samtools mpileup (classical or supporting parallelization).</description>
+	<description>Samtools mpileup (supporting parallelization)</description>
 	<requirements>
 		<requirement type="package" version="0.1.19">samtools-parallel-mpileup</requirement>
 		<requirement type="package" version="0.1.19">samtools</requirement>
@@ -66,6 +66,9 @@
 			#end for

 			 2> stderr_1.txt
+			#if $sort_mpileup == "true"
+			 | sort -k 1,1 -k 2,2
+			#end if
 			 > $output ;
 			 cat stderr_1.txt
 		#end if
@@ -84,10 +87,10 @@
 			</param>
 			<when value="indexed_filtered">
 				<param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" >
-					<options from_file="all_fasta.loc">
-						<column name="name" index="0"/>
+					<options from_data_table="all_fasta">
+						<column name="name" index="2"/>
 						<column name="dbkey" index="1"/>
-						<column name="value" index="3"/>
+						<column name="value" index="3"/><!-- Value is the path of the fasta file -->
 						<filter type="data_meta" ref="alignments" multiple="false" key="dbkey" column="1" />
 						<validator type="no_options" message="No indexes are available for the selected input dataset" />
 					</options>
@@ -98,10 +101,10 @@
 			</when>
 			<when value="indexed_all">
 				<param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" >
-					<options from_file="all_fasta.loc">
-						<column name="name" index="0"/>
+					<options from_data_table="all_fasta">
+						<column name="name"  index="2"/>
 						<column name="dbkey" index="1"/>
-						<column name="value" index="3"/>
+						<column name="value" index="3"/><!-- Value is the path of the fasta file -->
 						<validator type="no_options" message="No indexes are available for the selected input dataset" />
 					</options>
 				</param>
@@ -125,7 +128,7 @@
 				<param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" />
 			</when>
 			<when value="regions_file_bed">
-				<param type="data" name="samtools_l" format="bed"	 label="Samtools: specific regions (BED)" />
+				<param type="data" name="samtools_l" format="bed"     label="Samtools: specific regions (BED)" />
 			</when>
 		</conditional>

@@ -140,74 +143,107 @@
 			</when>
 		</conditional>

+		<param name="sort_mpileup" type="select" checked="false" truevalue="true" falsevalue="false" label="Sort mpileup file" help="Because parallelization may disrupt the outputs order, sorting can be conveniet for e.g. testing. Notice that this function has only use in a limited number of situations but costs CPU usage. Only use it if its really neccesairy." />
+
 		<conditional name="extended_parameters">
 			<param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings.">
 				<option value="default">Default settings</option>
 				<option value="extended">Extended settings</option>
 			</param>
-			<when value="default">
-			</when>
+			<when value="default" />
 			<when value="extended">
 				<param type="boolean" name="samtools_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" />
 				<param type="boolean" name="samtools_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" />
 				<param type="boolean" name="samtools_B" falsevalue="" truevalue=" -B" label="Samtools: disable BAQ computation" />
-				<param type="integer" name="samtools_C" value="0"					 label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" />
-				<param type="integer" name="samtools_d" value="250"				   label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" />
+				<param type="integer" name="samtools_C" value="0"                     label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" />
+				<param type="integer" name="samtools_d" value="250"                   label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" />
 				<param type="boolean" name="samtools_E" falsevalue="" truevalue=" -E" label="Samtools: recalculate extended BAQ on the fly thus ignoring existing BQs" />
-				<param type="integer" name="samtools_M" value="60"					label="cap mapping quality at INT [60]" />
+				<param type="integer" name="samtools_M" value="60"                    label="cap mapping quality at INT [60]" />
 				<param type="boolean" name="samtools_R" falsevalue="" truevalue=" -R" label="Samtools: ignore RG tags" />
-				<param type="integer" name="samtools_q" value="0"					 label="Samtools: skip alignments with mapQ smaller than INT [0]" />
-				<param type="integer" name="samtools_Q" value="13"					label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" />
+				<param type="integer" name="samtools_q" value="0"                     label="Samtools: skip alignments with mapQ smaller than INT [0]" />
+				<param type="integer" name="samtools_Q" value="13"                    label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" />

-				<param type="integer" name="samtools_e" value="20"					label="Samtools: Phred-scaled gap extension seq error probability [20]" />
-				<param type="float"   name="samtools_F" value="0.002"				 label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
-				<param type="integer" name="samtools_h" value="100"				   label="Samtools: coefficient for homopolymer errors [100]" />
+				<param type="integer" name="samtools_e" value="20"                    label="Samtools: Phred-scaled gap extension seq error probability [20]" />
+				<param type="float"   name="samtools_F" value="0.002"                 label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
+				<param type="integer" name="samtools_h" value="100"                   label="Samtools: coefficient for homopolymer errors [100]" />
 				<param type="boolean" name="samtools_I" falsevalue="" truevalue=" -I" label="Samtools: do not perform indel calling" />
-				<param type="integer" name="samtools_L" value="250"				   label="Samtools: max per-sample depth for INDEL calling [250]" />
-				<param type="integer" name="samtools_m" value="1"					 label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" />
-				<param type="integer" name="samtools_o" value="40"					label="Samtools: Phred-scaled gap open sequencing error probability [40]" />
+				<param type="integer" name="samtools_L" value="250"                   label="Samtools: max per-sample depth for INDEL calling [250]" />
+				<param type="integer" name="samtools_m" value="1"                     label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" />
+				<param type="integer" name="samtools_o" value="40"                    label="Samtools: Phred-scaled gap open sequencing error probability [40]" />
 				<param type="boolean" name="samtools_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" />
-				<param type="text"    name="samtools_P" value="all"				   label="Samtools: comma separated list of platforms for indels [all]" />
+				<param type="text"    name="samtools_P" value="all"                   label="Samtools: comma separated list of platforms for indels [all]" />
 			</when>
 		</conditional>
-
-		<param type="integer" name="samtools_threads" value="2" min="1" label="Samtools: mpileup threads" />
 	</inputs>

 	<outputs>
 		<data format="mpileup" name="output" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])}" />
 	</outputs>

+	<tests>
+		<test><!-- Use classical samtools -->
+			<param name="alignments" value="hg19_mutant.bam" dbkey="hg19" />
+			<param name="reference_genome_source.source_select" value="attribute" />
+			<param name="extended_parameters_regions.samtools_regions" value="entire_genome" />
+
+			<param name="mpileup_parallelization.mpileup_parallelization_select" value="false" />
+			<param name="sort_mpileup" value="true" />
+
+			<param name="extended_parameters.parameters" value="default" />
+
+
+			<output name="output" file="hg19_mutant.mpileup" />
+		</test>
+		<test><!-- Use parallelized samtools -->
+			<param name="alignments" value="hg19_mutant.bam" dbkey="hg19" />
+			<param name="reference_genome_source.source_select" value="attribute" />
+			<param name="extended_parameters_regions.samtools_regions" value="entire_genome" />
+
+			<param name="mpileup_parallelization.mpileup_parallelization_select" value="true" />
+			<param name="mpileup_parallelization.samtools_threads" value="2" />
+			<param name="sort_mpileup" value="true" />
+
+			<param name="extended_parameters.parameters" value="default" />
+
+
+			<output name="output" file="hg19_mutant.mpileup" />
+		</test>
+	</tests>
+
 	<help>
-VarScan2.3.6::
-
-*VarScan2 Overview*
-
-VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems.
-http://dx.doi.org/10.1101/gr.129684.111
-http://www.ncbi.nlm.nih.gov/pubmed/19542151
+**Samtools mpileup (supporting parallelization)**

-*VarScan* requires mpileup formatted input files, which are generally derived from BAM files. Since mpileup files can become humongous, the interim step of storing it is bypassed. Thus, in this wrapper one or multiple BAM/SAM files go in, get processed into a mpileup file and get directly linked to VarScan.
-The samtools package is not able to parallelize the mpileup generation which make it a very slow process.
-Other people were aware of this and have written a version that can do parallelization:
-https://github.com/mydatascience/parallel-mpileup
+SAM (Sequence Alignment/Map) format is a generic format for storing large nucleotide sequence alignments. SAM aims to be a format that:
+
+Is flexible enough to store all the alignment information generated by various alignment programs;
+Is simple enough to be easily generated by alignment programs or converted from existing alignment formats;
+Is compact in file size;
+Allows most of operations on the alignment to work on a stream without loading the whole alignment into memory;
+Allows the file to be indexed by genomic position to efficiently retrieve all reads aligning to a locus.
+SAM Tools provide various utilities for manipulating alignments in the SAM format, including sorting, merging, indexing and generating alignments in a per-position format.

-Consequently, when a BAM files gets processed by this wrapper, it's processed by *parallel-mpileup* before its send to VarScan.
+SAMtools is hosted by SourceForge.net. The project page is http://samtools.sourceforge.net/. The source code releases are available from the download page. You can check out the most recent source code from the github project page with:
+git clone git://github.com/samtools/samtools.git
+https://github.com/mydatascience/parallel-mpileup/

-.. _VarScan: http://varscan.sourceforge.net/
+Because samtools does not support parallization of the mpileup command, the project was forked to include paralellization support:
+
+
+However, since the project seems to lack support and contains fatal bugs this project was continued at:
+https://github.com/yhoogstrate/parallel-mpileup/
+

 **Input formats**

-VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing.
+Satmools accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing.

 **Installation**

-Make sure your reference genomes are properly annotated in "tool-data/all_fasta.loc", and linked to the names of the reference used for alignment.
+The installation is fully automatic.

 **License**

-* VarScan2.3.6: Non-Profit Open Software License 3.0 (Non-Profit OSL 3.0)
-* parallel-mpileup: MIT License (https://github.com/mydatascience/parallel-mpileup/blob/master/samtools-0.1.19/COPYING)
+* parallel-mpileup: MIT License (https://github.com/yhoogstrate/parallel-mpileup/blob/master/samtools-0.1.19/COPYING)
 * samtool: MIT License


@@ -219,4 +255,4 @@
 More tools by the Translational Research IT (TraIT) project can be found in the following repository:
 http://toolshed.dtls.nl/
 </help>
-</tool>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/generate_reads.py	Wed Mar 05 05:42:06 2014 -0500
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+
+
+import random
+import math
+
+
+class Region:
+	def __init__(self,start,stop,sequence):
+		self.start = start
+		self.stop = stop
+		self.sequence = sequence.strip().replace("\n","").replace(" ","")
+		if(len(self.sequence) != self.getSpanningLength()):
+			print "ERROR: sequence length: "+str(len(self.sequence))+", while spanning region is: "+str(self.getSpanningLength())
+			import sys
+			sys.exit()
+
+	def getSpanningLength(self):
+		return abs(self.stop-self.start+1)
+
+class ReadSynthesizer:
+	def __init__(self,chromosome):
+		self.regions = []
+		self.chromosome = chromosome
+
+	def addRegion(self,region):
+		self.regions.append(region)
+
+	def produceReads(self,readDensity = 1,read_length = 50):
+		"""
+		Produces uniform reads by walking iteratively over self.regions
+		"""
+
+		mRNA = self.getTotalmRNA()
+		spanning_length = self.getRegionSpanningLength()
+		n = spanning_length['total'] - read_length + 1
+
+		j = 0
+		k = 0
+
+		for i in range(n):
+			#  "alpha is playing the role of k and beta is playing the role of theta"
+			dd = max(0,int(round(random.lognormvariate(math.log(readDensity),0.5))))# Notice this is NOT a binomial distribution!!
+
+			for d in range(dd):
+				sequence = mRNA[i:i+read_length]
+
+				if(random.randint(0,1) == 0):
+					strand = 0
+				else:
+					strand = 16
+				flag = strand + 0
+
+				print "read_"+str(j)+"."+str(i)+"."+str(d)+"\t"+str(flag)+"\t"+self.chromosome+"\t"+str(self.regions[j].start + k)+"\t60\t"+self.getMappingString(read_length,j,k)+"\t*\t0\t0\t"+str(sequence.upper())+"\t*"
+
+			spanning_length['iter'][j] -= 1
+			if(k >= self.regions[j].getSpanningLength()-1):
+				j += 1
+				k = 0
+			else:
+				k += 1
+
+	def getMappingString(self,length,j,offset):
+		m = 0
+
+		out = ""
+
+		for i in range(length):
+			k = i + offset
+
+			if(k >= self.regions[j].getSpanningLength()):
+				j += 1
+
+				out += str(m)+"M"
+				out += (str(self.regions[j].start - self.regions[j-1].stop-1))+"N"
+				m = 1
+
+				offset = -k
+			else:
+				m += 1
+
+		out += str(m) + "M"
+
+
+		return out
+
+	def getRegionSpanningLength(self):
+		length = {'total':0,'iter':[]}
+		for r in self.regions:
+			l = r.getSpanningLength()
+			length['iter'].append(l)
+			length['total'] += l
+		return length
+
+	def getTotalmRNA(self):
+		mRNA = ""
+		for r in self.regions:
+			mRNA += r.sequence
+		return mRNA
+
+#rs = ReadSynthesizer('chr6')
+#rs.addRegion(Region(100,149,'ccaggactggtttctgtaagaaacagcaggagctgtggcagcggcgaaag'))
+#rs.addRegion(Region(151,152,'at'))
+#rs.produceReads(3,50)
+
+
+rs = ReadSynthesizer('chr6')
+rs.addRegion(Region(154360546,154360969,'ccaggactggtttctgtaagaaacagcaggagctgtggcagcggcgaaaggaagcggctgaggcgcttggaacccgaaaagtctcggtgctcctggctacctcgcacagcggtgcccgcccggccgtcagtaccatggacagcagcgctgcccccacgaacgccagcaattgcactgatgccttggcgtactcaagttgctccccagcacccagccccggttcctgggtcaacttgtcccacttagatggcGacctgtccgacccatgcggtccgaaccgcaccgacctgggcgggagagacagcctgtgccctccgaccggcagtccctccatgatcacggccatcacgatcatggccctctactccatcgtgtgcgtggtggggctcttcggaaacttcctggtcatgtatgtgattgtcag'))
+rs.addRegion(Region(154410961,154411313,'atacaccaagatgaagactgccaccaacatctacattttcaaccttgctctggcagatgccttagccaccagtaccctgcccttccagagtgtgaattacctaatgggaacatggccatttggaaccatcctttgcaagatagtgatctccatagattactataacatgttcaccagcatattcaccctctgcaccatgagtgttgatcgatacattgcagtctgccaccctgtcaaggccttagatttccgtactccccgaaatgccaaaattatcaatgtctgcaactggatcctctcttcagccattggtcttcctgtaatgttcatggctacaacaaaatacaggcaag'))
+rs.addRegion(Region(154412087,154412607,'gttccatagattgtacactaacattctctcatccaacctggtactgggaaaacctgctgaagatctgtgttttcatcttcgccttcattatgccagtgctcatcattaccgtgtgctatggactgatgatcttgcgcctcaagagtgtccgcatgctctctggctccaaagaaaaggacaggaatcttcgaaggatcaccaggatggtgctggtggtggtggctgtgttcatcgtctgctggactcccattcacatttacgtcatcattaaagccttggttacaatcccagaaactacgttccagactgtttcttggcacttctgcattgctctaggttacacaaacagctgcctcaacccagtcctttatgcatttctggatgaaaacttcaaacgatgcttcagagagttctgtatcccaacctcttccaacattgagcaacaaaactccactcgaattcgtcagaacactagagaccacccctccacggccaatacagtggatagaactaatcatcag'))
+rs.addRegion(Region(154428600,154428787,'gtggaattgaacctggactgtcactgtgaaaatgcaaagccttggccactgagctacaatgcagggcagtctccatttcccttcccaggaagagtctagagcattaattttgagtttgcaaaggcttgtaactatttcatatgatttttagagctgactatgacatgaaccctaaaattcctgttccc'))
+rs.produceReads(3,50)
+
+
+
+
+
+
--- a/varscan_mpileup2snp.xml	Tue Mar 04 07:50:19 2014 -0500
+++ b/varscan_mpileup2snp.xml	Wed Mar 05 05:42:06 2014 -0500
@@ -38,18 +38,18 @@
 			<when value="default">
 			</when>
 			<when value="extended">
-				<param type="integer" name="varscan_min_coverage"	 value="8"	label="VarScan: Minimum read depth at a position to make a call [8]" />
-				<param type="integer" name="varscan_min_reads2"	   value="2"	label="VarScan: PMinimum supporting reads at a position to call variants [2]" />
-				<param type="integer" name="varscan_min_avg_qual"	 value="15"   label="VarScan: Minimum base quality at a position to count a read [15]" />
-				<param type="float"   name="varscan_min_var_freq"	 value="0.01" label="VarScan: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
-				<param type="float"   name="varscan_min_freq_for_hom" value="0.75" label="VarScan: Minimum frequency to call homozygote [0.75]" />
-				<param type="float"   name="varscan_p_value"		  value="0.99" label="VarScan: Default p-value threshold for calling variants [99e-02]" />
-				<param type="boolean" name="varscan_strand_filter"	falsevalue=" --strand_filter 0" truevalue=" --strand_filter 1" checked="true" label="VarScan: Ignore variants with >90% support on one strand [1]" />
-				<param type="boolean" name="varscan_variants"		 falsevalue=" --variants 0"	  truevalue=" --variants 1"					 label="VarScan: Report only variant (SNP/indel) positions [0]" />
+				<param type="integer" name="varscan_min_coverage"     value="8"       label="VarScan: Minimum read depth at a position to make a call [8]" />
+				<param type="integer" name="varscan_min_reads2"	      value="2"       label="VarScan: PMinimum supporting reads at a position to call variants [2]" />
+				<param type="integer" name="varscan_min_avg_qual"     value="15"      label="VarScan: Minimum base quality at a position to count a read [15]" />
+				<param type="float"   name="varscan_min_var_freq"     value="0.01"    label="VarScan: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
+				<param type="float"   name="varscan_min_freq_for_hom" value="0.75"    label="VarScan: Minimum frequency to call homozygote [0.75]" />
+				<param type="float"   name="varscan_p_value"          value="0.99"    label="VarScan: Default p-value threshold for calling variants [99e-02]" />
+				<param type="boolean" name="varscan_strand_filter"    falsevalue=" --strand_filter 0" truevalue=" --strand_filter 1" checked="true"  label="VarScan: Ignore variants with >90% support on one strand [1]" />
+				<param type="boolean" name="varscan_variants"         falsevalue=" --variants 0"      truevalue=" --variants 1"      checked="false" label="VarScan: Report only variant (SNP/indel) positions [0]" />
 			</when>
 		</conditional>

-		<param type="boolean" name="varscan_output_vcf" falsevalue="0" truevalue="1" label="VarScan: If set to 1, outputs in VCF format" />
+		<param type="boolean" name="varscan_output_vcf" falsevalue="0" truevalue="1"  label="VarScan: If set to 1, outputs in VCF format" />
 	</inputs>

 	<outputs>
@@ -60,10 +60,18 @@
 		</data>
 	</outputs>

+	<tests>
+		<test>
+			<param name="mpileup_input" value="hg19_mutant.mpileup" dbkey="hg19" ftype="bam" />
+			<param name="extended_parameters.parameters" value="default" />
+			<param name="varscan_output_vcf" value="1" />
+
+			<output name="snv_output" file="hg19_mutant.vcf" />
+		</test>
+	</tests>
+
 	<help>
-VarScan2.3.6::
-
-*VarScan2 Overview*
+**VarScan 2.3.6**

 VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems.
 http://dx.doi.org/10.1101/gr.129684.111
@@ -100,4 +108,4 @@
 More tools by the Translational Research IT (TraIT) project can be found in the following repository:
 http://toolshed.dtls.nl/
 </help>
-</tool>
+</tool>
\ No newline at end of file
--- a/varscan_mpileup2snp_from_bam.xml	Tue Mar 04 07:50:19 2014 -0500
+++ b/varscan_mpileup2snp_from_bam.xml	Wed Mar 05 05:42:06 2014 -0500
@@ -84,7 +84,9 @@
 			#end if

 			 --output-vcf $varscan_output_vcf
-			 > $snv_output
+			#if $sort_mpileup == "true"
+			 | sort -k 1,1 -k 2,2
+			#end if
 			 2> stderr_2.txt ;

 			 echo "-------------------------[ mpileup generation ]-------------------------" ;
@@ -136,9 +138,7 @@
 					</options>
 				</param>
 			</when>
-			<when value="attribute">
-				<!-- Do nothing -->
-			</when>
+			<when value="attribute" />
 		</conditional>

 		<conditional name="extended_parameters_regions">
@@ -148,9 +148,7 @@
 				<option value="regions_file_pos">Specific positions (file); list of positions</option>
 				<option value="regions_file_bed">Specific regions (file); list of regions in BED</option>
 			</param>
-			<when value="entire_genome">
-				<!-- Do nothing -->
-			</when>
+			<when value="entire_genome" />
 			<when value="region">
 				<param type="text" name="samtools_r" label="Samtools: region in which pileup is generated" help="chr:pos or chr:start-end" />
 			</when>
@@ -173,6 +171,8 @@
 			</when>
 		</conditional>

+		<param name="sort_mpileup" type="select" checked="false" truevalue="true" falsevalue="false" label="Sort mpileup file" help="Because parallelization may disrupt the outputs order, sorting can be conveniet for e.g. testing. Notice that this function has only use in a limited number of situations but costs CPU usage. Only use it if its really neccesairy." />
+
 		<conditional name="extended_parameters">
 			<param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings.">
 				<option value="default">Default settings</option>
@@ -183,36 +183,36 @@
 				<param type="boolean" name="samtools_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" />
 				<param type="boolean" name="samtools_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" />
 				<param type="boolean" name="samtools_B" falsevalue="" truevalue=" -B" label="Samtools: disable BAQ computation" />
-				<param type="integer" name="samtools_C" value="0"					 label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" />
-				<param type="integer" name="samtools_d" value="250"				   label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" />
+				<param type="integer" name="samtools_C" value="0"                     label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" />
+				<param type="integer" name="samtools_d" value="250"                   label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" />
 				<param type="boolean" name="samtools_E" falsevalue="" truevalue=" -E" label="Samtools: recalculate extended BAQ on the fly thus ignoring existing BQs" />
-				<param type="integer" name="samtools_M" value="60"					label="cap mapping quality at INT [60]" />
+				<param type="integer" name="samtools_M" value="60"                    label="cap mapping quality at INT [60]" />
 				<param type="boolean" name="samtools_R" falsevalue="" truevalue=" -R" label="Samtools: ignore RG tags" />
-				<param type="integer" name="samtools_q" value="0"					 label="Samtools: skip alignments with mapQ smaller than INT [0]" />
-				<param type="integer" name="samtools_Q" value="13"					label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" />
+				<param type="integer" name="samtools_q" value="0"                     label="Samtools: skip alignments with mapQ smaller than INT [0]" />
+				<param type="integer" name="samtools_Q" value="13"                    label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" />

-				<param type="integer" name="samtools_e" value="20"					label="Samtools: Phred-scaled gap extension seq error probability [20]" />
-				<param type="float"   name="samtools_F" value="0.002"				 label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
-				<param type="integer" name="samtools_h" value="100"				   label="Samtools: coefficient for homopolymer errors [100]" />
+				<param type="integer" name="samtools_e" value="20"                    label="Samtools: Phred-scaled gap extension seq error probability [20]" />
+				<param type="float"   name="samtools_F" value="0.002"                 label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
+				<param type="integer" name="samtools_h" value="100"                   label="Samtools: coefficient for homopolymer errors [100]" />
 				<param type="boolean" name="samtools_I" falsevalue="" truevalue=" -I" label="Samtools: do not perform indel calling" />
-				<param type="integer" name="samtools_L" value="250"				   label="Samtools: max per-sample depth for INDEL calling [250]" />
-				<param type="integer" name="samtools_m" value="1"					 label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" />
-				<param type="integer" name="samtools_o" value="40"					label="Samtools: Phred-scaled gap open sequencing error probability [40]" />
+				<param type="integer" name="samtools_L" value="250"                   label="Samtools: max per-sample depth for INDEL calling [250]" />
+				<param type="integer" name="samtools_m" value="1"                     label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" />
+				<param type="integer" name="samtools_o" value="40"                    label="Samtools: Phred-scaled gap open sequencing error probability [40]" />
 				<param type="boolean" name="samtools_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" />
-				<param type="text"    name="samtools_P" value="all"				   label="Samtools: comma separated list of platforms for indels [all]" />
+				<param type="text"    name="samtools_P" value="all"                   label="Samtools: comma separated list of platforms for indels [all]" />

-				<param type="integer" name="varscan_min_coverage"	 value="8"	label="VarScan: Minimum read depth at a position to make a call [8]" />
-				<param type="integer" name="varscan_min_reads2"	   value="2"	label="VarScan: PMinimum supporting reads at a position to call variants [2]" />
-				<param type="integer" name="varscan_min_avg_qual"	 value="15"   label="VarScan: Minimum base quality at a position to count a read [15]" />
-				<param type="float"   name="varscan_min_var_freq"	 value="0.01" label="VarScan: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
-				<param type="float"   name="varscan_min_freq_for_hom" value="0.75" label="VarScan: Minimum frequency to call homozygote [0.75]" />
-				<param type="float"   name="varscan_p_value"		  value="0.99" label="VarScan: Default p-value threshold for calling variants [99e-02]" />
-				<param type="boolean" name="varscan_strand_filter"	falsevalue=" --strand_filter 0" truevalue=" --strand_filter 1" checked="true" label="VarScan: Ignore variants with >90% support on one strand [1]" />
-				<param type="boolean" name="varscan_variants"		 falsevalue=" --variants 0"	  truevalue=" --variants 1"					 label="VarScan: Report only variant (SNP/indel) positions [0]" />
+				<param type="integer" name="varscan_min_coverage"     value="8"       label="VarScan: Minimum read depth at a position to make a call [8]" />
+				<param type="integer" name="varscan_min_reads2"	      value="2"       label="VarScan: PMinimum supporting reads at a position to call variants [2]" />
+				<param type="integer" name="varscan_min_avg_qual"     value="15"      label="VarScan: Minimum base quality at a position to count a read [15]" />
+				<param type="float"   name="varscan_min_var_freq"     value="0.01"    label="VarScan: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
+				<param type="float"   name="varscan_min_freq_for_hom" value="0.75"    label="VarScan: Minimum frequency to call homozygote [0.75]" />
+				<param type="float"   name="varscan_p_value"          value="0.99"    label="VarScan: Default p-value threshold for calling variants [99e-02]" />
+				<param type="boolean" name="varscan_strand_filter"    falsevalue=" --strand_filter 0" truevalue=" --strand_filter 1" checked="true"  label="VarScan: Ignore variants with >90% support on one strand [1]" />
+				<param type="boolean" name="varscan_variants"         falsevalue=" --variants 0"      truevalue=" --variants 1"      checked="false" label="VarScan: Report only variant (SNP/indel) positions [0]" />
 			</when>
 		</conditional>

-		<param type="boolean" name="varscan_output_vcf" falsevalue="0" truevalue="1" label="VarScan: If set to 1, outputs in VCF format" />
+		<param type="boolean" name="varscan_output_vcf" falsevalue="0" truevalue="1"  label="VarScan: If set to 1, outputs in VCF format" />
 	</inputs>

 	<outputs>
@@ -223,10 +223,40 @@
 		</data>
 	</outputs>

+	<tests>
+		<test><!-- Use classical samtools -->
+			<param name="alignments" value="hg19_mutant.bam" dbkey="hg19" ftype="bam" />
+			<param name="reference_genome_source.source_select" value="attribute" />
+			<param name="extended_parameters_regions.samtools_regions" value="entire_genome" />
+
+			<param name="mpileup_parallelization.mpileup_parallelization_select" value="false" />
+			<param name="sort_mpileup" value="true" />
+
+			<param name="extended_parameters.parameters" value="default" />
+			<param name="varscan_output_vcf" value="1" />
+
+
+			<output name="snv_output" file="hg19_mutant.vcf" />
+		</test>
+		<test><!-- Use parallelized samtools -->
+			<param name="alignments" value="hg19_mutant.bam" dbkey="hg19" ftype="bam" />
+			<param name="reference_genome_source.source_select" value="attribute" />
+			<param name="extended_parameters_regions.samtools_regions" value="entire_genome" />
+
+			<param name="mpileup_parallelization.mpileup_parallelization_select" value="true" />
+			<param name="mpileup_parallelization.samtools_threads" value="2" />
+			<param name="sort_mpileup" value="true" />
+
+			<param name="extended_parameters.parameters" value="default" />
+			<param name="varscan_output_vcf" value="1" />
+
+
+			<output name="snv_output" file="hg19_mutant.vcf" />
+		</test>
+	</tests>
+
 	<help>
-VarScan2.3.6::
-
-*VarScan2 Overview*
+**VarScan 2.3.6**

 VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems.
 http://dx.doi.org/10.1101/gr.129684.111
@@ -263,4 +293,4 @@
 More tools by the Translational Research IT (TraIT) project can be found in the following repository:
 http://toolshed.dtls.nl/
 </help>
-</tool>
+</tool>
\ No newline at end of file