changeset 10:29ac0abd267e

Uploaded
author yhoogstrate
date Tue, 17 Dec 2013 10:51:58 -0500
parents bd04d35e443d
children dd96b65174df
files varscan2_from_bam.xml
diffstat 1 files changed, 115 insertions(+), 74 deletions(-) [+]
line wrap: on
line diff
--- a/varscan2_from_bam.xml	Tue Dec 17 06:36:29 2013 -0500
+++ b/varscan2_from_bam.xml	Tue Dec 17 10:51:58 2013 -0500
@@ -5,80 +5,64 @@
 		<requirement type="package" version="1.0.19">samtools-mpileup-parallel</requirement>
 		<requirement type="package" version="2.3.6">VarScan</requirement>
 	</requirements>
-	<command interpreter="bash">
-		<!--
-			VarScan2 url:
-			http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.6.jar?r=http%3A%2F%2Fsourceforge.net%2Fprojects%2Fvarscan%2Ffiles%2F&ts=1377264954&use_mirror=freefr
-			
-			The following script is written in the "Cheetah" language:
-			http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html
-		-->
+	<command>
+		samtools-mpileup-parallel mpileup
+			-t $samtools_threads
+			-f $reference_genome
+		
+		#if $extended_parameters_regions.samtools_regions == "region"
+			-r $extended_parameters_regions.$samtools_r
+		#elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed"
+			-l $extended_parameters_regions.$samtools_l
+		#end if
 		
-		<!--
-			Usage: samtools mpileup [options] in1.bam [in2.bam [...]]
-			
-			Input options:
-				   -6           assume the quality is in the Illumina-1.3+ encoding
-				   -A           count anomalous read pairs
-				   -B           disable BAQ computation
-				   -b FILE      list of input BAM filenames, one per line [null]
-				   -C INT       parameter for adjusting mapQ; 0 to disable [0]
-				   -d INT       max per-BAM depth to avoid excessive memory usage [250]
-				   -E           recalculate extended BAQ on the fly thus ignoring existing BQs
-				   -f FILE      faidx indexed reference sequence file [null]
-				   -G FILE      exclude read groups listed in FILE [null]
-				   -l FILE      list of positions (chr pos) or regions (BED) [null]
-				   -M INT       cap mapping quality at INT [60]
-				   -r STR       region in which pileup is generated [null]
-				   -R           ignore RG tags
-				   -q INT       skip alignments with mapQ smaller than INT [0]
-				   -Q INT       skip bases with baseQ/BAQ smaller than INT [13]
-				   --rf INT     required flags: skip reads with mask bits unset []
-				   --ff INT     filter flags: skip reads with mask bits set []
-				   -t INT       Number of parallel threads
+		#if $extended_parameters.parameters == "extended"
+			$extended_parameters.samtools_6
+			$extended_parameters.samtools_A
+			$extended_parameters.samtools_B
+			 -C $extended_parameters.samtools_C
+			 -d $extended_parameters.samtools_d
+			$extended_parameters.samtools_E
+			 -M $extended_parameters.samtools_M
+			$extended_parameters.samtools_R
+			 -q $extended_parameters.samtools_q
+			 -Q $extended_parameters.samtools_Q
 			
-			Output options:
-				   -D           output per-sample DP in BCF (require -g/-u)
-				   -g           generate BCF output (genotype likelihoods)
-				   -O           output base positions on reads (disabled by -g/-u)
-				   -s           output mapping quality (disabled by -g/-u)
-				   -S           output per-sample strand bias P-value in BCF (require -g/-u)
-				   -u           generate uncompress BCF output
-			
-			SNP/INDEL genotype likelihoods options (effective with `-g' or `-u'):
-				   -e INT       Phred-scaled gap extension seq error probability [20]
-				   -F FLOAT     minimum fraction of gapped reads for candidates [0.002]
-				   -h INT       coefficient for homopolymer errors [100]
-				   -I           do not perform indel calling
-				   -L INT       max per-sample depth for INDEL calling [250]
-				   -m INT       minimum gapped reads for indel candidates [1]
-				   -o INT       Phred-scaled gap open sequencing error probability [40]
-				   -p           apply -m and -F per-sample to increase sensitivity
-				   -P STR       comma separated list of platforms for indels [all]
-		-->
+			 -e $extended_parameters.samtools_e
+			 -F $extended_parameters.samtools_F
+			 -h $extended_parameters.samtools_h
+			$extended_parameters.samtools_I
+			 -L $extended_parameters.samtools_L
+			 -m $extended_parameters.samtools_m
+			 -o $extended_parameters.samtools_o
+			$extended_parameters.samtools_p
+			 -P $extended_parameters.samtools_P
+		#end if
 		
-		<!--
-			USAGE: java -jar VarScan.jar mpileup2cns [pileup file] OPTIONS
-			mpileup file - The SAMtools mpileup file
-			
-			OPTIONS:
-			--min-coverage	Minimum read depth at a position to make a call [8]
-			--min-reads2	Minimum supporting reads at a position to call variants [2]
-			--min-avg-qual	Minimum base quality at a position to count a read [15]
-			--min-var-freq	Minimum variant allele frequency threshold [0.01]
-			--min-freq-for-hom	Minimum frequency to call homozygote [0.75]
-			--p-value	Default p-value threshold for calling variants [99e-02]
-			--strand-filter	Ignore variants with >90% support on one strand [1]
-			--output-vcf	If set to 1, outputs in VCF format
-			--vcf-sample-list	For VCF output, a list of sample names in order, one per line
-			--variants	Report only variant (SNP/indel) positions [0]
-		-->
-		
-		samtools-mpileup-parallel mpileup -t $samtools_threads -f $reference_genome
 		#for $sample in $samples
 			 ${sample.mapped_reads}
 		#end for
-		 | java -jar VarScan.v2.3.6.jar mpileup2snp $varscan_vcf_output > $output_table
+		
+		 | java
+				 -Xmx64G
+				 -jar VarScan.v2.3.6.jar
+					 mpileup2snp
+		 
+		#if $extended_parameters.parameters == "extended"
+				 --min-coverage     $varscan_min_coverage
+				 --min-reads2       $varscan_min_reads2
+				 --min-avg-qual     $varscan_min_avg_qual
+				 --min-var-freq     $varscan_min_var_freq
+				 --min-freq-for-hom $varscan_min_freq_for_hom
+				 --p-value          $varscan_p_value
+				$varscan_strand_filter
+				$varscan_output_vcf
+				$varscan_variants
+		#end if
+		
+		$varscan_output_vcf
+		 
+		 > $output_table
 	</command>
 	
 	<inputs>
@@ -86,15 +70,72 @@
 			<param format="bam,sam" name="mapped_reads" type="data" label="Alignment file" help="Mapped reads in BAM or SAM format."/>
 		</repeat>
 		
+		<!-- Find out how to access the reference genome from the BAM file(s) -->
 		<param format="fa,fasta" name="reference_genome" type="data" label="Gene Model Annotations" help="Reference genome (genome.fa) that corresponds to the *.bam file." />
-		<input type="text" name="region" label="region in which pileup is generated, leave empy for entire genome" />
 		
+		<conditional name="extended_parameters_regions">
+			<param name="samtools_regions" type="select" label="VarScan parameters" help="For more advanced VarScan settings.">
+				<option value="entire_genome">Entire genome</option>
+				<option value="region">Specific region</option>
+				<option value="regions_file_pos">Specific positions (file); list of positions</option>
+				<option value="regions_file_bed">Specific regions (file); list of regions in BED</option>
+			</param>
+			<when value="entire_genome">
+			</when>
+			<when value="region">
+				<param type="text" name="samtools_r" label="Samtools: region in which pileup is generated" help="chr:pos or chr:start-end" />
+			</when>
+			<when value="regions_file_pos">
+				<param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" />
+			</when>
+			<when value="regions_file_bed">
+				<param type="data" name="samtools_l" format="bed"     label="Samtools: specific regions (BED)" />
+			</when>
+		</conditional>
+		
+		<param type="integer" name="samtools_threads" value="8" min="1" label="Samtools: mpileup threads" />
 		
-		<input type="integer" name="samtools_threads" value="8" min="1" title="Samtools: mpileup threads" />
-		
+		<conditional name="extended_parameters">
+			<param name="parameters" type="select" label="VarScan parameters" help="For more advanced VarScan settings.">
+				<option value="default">Default settings</option>
+				<option value="extended">Extended settings</option>
+			</param>
+			<when value="default">
+			</when>
+			<when value="extended">
+				<param type="boolean" name="samtools_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" />
+				<param type="boolean" name="samtools_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" />
+				<param type="boolean" name="samtools_B" falsevalue="" truevalue=" -B" label="Samtools: disable BAQ computation" />
+				<param type="integer" name="samtools_C" value="0"                     label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" />
+				<param type="integer" name="samtools_d" value="250"                   label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" />
+				<param type="boolean" name="samtools_E" falsevalue="" truevalue=" -E" label="Samtools: recalculate extended BAQ on the fly thus ignoring existing BQs" />
+				<param type="integer" name="samtools_M" value="60"                    label="cap mapping quality at INT [60]" />
+				<param type="boolean" name="samtools_R" falsevalue="" truevalue=" -R" label="Samtools: ignore RG tags" />
+				<param type="integer" name="samtools_q" value="0"                     label="Samtools: skip alignments with mapQ smaller than INT [0]" />
+				<param type="integer" name="samtools_Q" value="13"                    label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" />
+				
+				<param type="integer" name="samtools_e" value="20"                    label="Samtools: Phred-scaled gap extension seq error probability [20]" />
+				<param type="float"   name="samtools_F" value="0.002"                 label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
+				<param type="integer" name="samtools_h" value="100"                   label="Samtools: coefficient for homopolymer errors [100]" />
+				<param type="boolean" name="samtools_I" falsevalue="" truevalue=" -I" label="Samtools: do not perform indel calling" />
+				<param type="integer" name="samtools_L" value="250"                   label="Samtools: max per-sample depth for INDEL calling [250]" />
+				<param type="integer" name="samtools_m" value="1"                     label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" />
+				<param type="integer" name="samtools_o" value="40"                    label="Samtools: Phred-scaled gap open sequencing error probability [40]" />
+				<param type="boolean" name="samtools_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" />
+				<param type="text"    name="samtools_P" value="all"                   label="Samtools: comma separated list of platforms for indels [all]" />
+				
+				<param type="integer" name="varscan_min_coverage"     value="8"    label="VarScan: Minimum read depth at a position to make a call [8]" />
+				<param type="integer" name="varscan_min_reads2"       value="2"    label="VarScan: PMinimum supporting reads at a position to call variants [2]" />
+				<param type="integer" name="varscan_min_avg_qual"     value="15"   label="VarScan: Minimum base quality at a position to count a read [15]" />
+				<param type="float"   name="varscan_min_var_freq"     value="0.01" label="VarScan: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
+				<param type="float"   name="varscan_min_freq_for_hom" value="0.75" label="VarScan: Minimum frequency to call homozygote [0.75]" />
+				<param type="float"   name="varscan_p_value"          value="0.99" label="VarScan: Default p-value threshold for calling variants [99e-02]" />
+				<param type="boolean" name="varscan_strand_filter"    falsevalue=" --strand_filter 0" truevalue=" --strand_filter 1" checked="true" label="VarScan: Ignore variants with >90% support on one strand [1]" />
+				<param type="boolean" name="varscan_variants"         falsevalue=" --variants 0"      truevalue=" --variants 1"                     label="VarScan: Report only variant (SNP/indel) positions [0]" />
+			</when>
+		</conditional>
 		
-		
-		<input type="boolean" name="varscan_vcf_output" falsevalue=" --output-vcf 0" truevalue=" --output-vcf 1" title="VarScan: VCF output" />
+		<param type="boolean" name="varscan_output_vcf" falsevalue=" --output-vcf 0" truevalue=" --output-vcf 1" label="VarScan: If set to 1, outputs in VCF format" />
 	</inputs>
 	
 	<outputs>
@@ -102,6 +143,6 @@
 	</outputs>
 	
 	<help>
-		VarScan2.3.6.
+		VarScan2.3.6
 	</help>
 </tool>