Mercurial > repos > yhoogstrate > varscan_mpileup2snp_from_bam

--- a/varscan2_from_bam.xml	Tue Dec 17 05:03:38 2013 -0500
+++ b/varscan2_from_bam.xml	Tue Dec 17 05:23:00 2013 -0500
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <tool id="varscan2_from_bam" name="VarScan2 from BAM">
-	<description>VarScan2 reading a *.bam file to avoid unncessairy I/O overhead.</description>
+	<description>VarScan2 (1: directly reading a *.bam file, 2: using parallel mpileup generation, to avoid unncessairy I/O overhead.</description>
 	<requirements>
 		<requirement type="package" version="1.0.19">samtools-mpileup-parallel</requirement>
 		<requirement type="package" version="2.3.6">VarScan</requirement>
@@ -13,21 +13,88 @@
 			The following script is written in the "Cheetah" language:
 			http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html
 		-->
-		samtools-mpileup-parallel mpileup -t $threads -f $reference_genome
+
+		<!--
+			Usage: samtools mpileup [options] in1.bam [in2.bam [...]]
+
+			Input options:
+				   -6           assume the quality is in the Illumina-1.3+ encoding
+				   -A           count anomalous read pairs
+				   -B           disable BAQ computation
+				   -b FILE      list of input BAM filenames, one per line [null]
+				   -C INT       parameter for adjusting mapQ; 0 to disable [0]
+				   -d INT       max per-BAM depth to avoid excessive memory usage [250]
+				   -E           recalculate extended BAQ on the fly thus ignoring existing BQs
+				   -f FILE      faidx indexed reference sequence file [null]
+				   -G FILE      exclude read groups listed in FILE [null]
+				   -l FILE      list of positions (chr pos) or regions (BED) [null]
+				   -M INT       cap mapping quality at INT [60]
+				   -r STR       region in which pileup is generated [null]
+				   -R           ignore RG tags
+				   -q INT       skip alignments with mapQ smaller than INT [0]
+				   -Q INT       skip bases with baseQ/BAQ smaller than INT [13]
+				   --rf INT     required flags: skip reads with mask bits unset []
+				   --ff INT     filter flags: skip reads with mask bits set []
+				   -t INT       Number of parallel threads
+
+			Output options:
+				   -D           output per-sample DP in BCF (require -g/-u)
+				   -g           generate BCF output (genotype likelihoods)
+				   -O           output base positions on reads (disabled by -g/-u)
+				   -s           output mapping quality (disabled by -g/-u)
+				   -S           output per-sample strand bias P-value in BCF (require -g/-u)
+				   -u           generate uncompress BCF output
+
+			SNP/INDEL genotype likelihoods options (effective with `-g' or `-u'):
+				   -e INT       Phred-scaled gap extension seq error probability [20]
+				   -F FLOAT     minimum fraction of gapped reads for candidates [0.002]
+				   -h INT       coefficient for homopolymer errors [100]
+				   -I           do not perform indel calling
+				   -L INT       max per-sample depth for INDEL calling [250]
+				   -m INT       minimum gapped reads for indel candidates [1]
+				   -o INT       Phred-scaled gap open sequencing error probability [40]
+				   -p           apply -m and -F per-sample to increase sensitivity
+				   -P STR       comma separated list of platforms for indels [all]
+		-->
+
+		<!--
+			USAGE: java -jar VarScan.jar mpileup2cns [pileup file] OPTIONS
+			mpileup file - The SAMtools mpileup file
+
+			OPTIONS:
+			--min-coverage	Minimum read depth at a position to make a call [8]
+			--min-reads2	Minimum supporting reads at a position to call variants [2]
+			--min-avg-qual	Minimum base quality at a position to count a read [15]
+			--min-var-freq	Minimum variant allele frequency threshold [0.01]
+			--min-freq-for-hom	Minimum frequency to call homozygote [0.75]
+			--p-value	Default p-value threshold for calling variants [99e-02]
+			--strand-filter	Ignore variants with >90% support on one strand [1]
+			--output-vcf	If set to 1, outputs in VCF format
+			--vcf-sample-list	For VCF output, a list of sample names in order, one per line
+			--variants	Report only variant (SNP/indel) positions [0]
+		-->
+
+		samtools-mpileup-parallel mpileup -t $samtools_threads -f $reference_genome
 		#for $sample in $samples
 			 ${sample.mapped_reads}
 		#end for
-		 | java -jar VarScan.v2.2.jar pileup2snp > $output_table
+		 | java -jar VarScan.v2.3.6.jar mpileup2snp $varscan_vcf_output > $output_table
 	</command>

-	<input type="text" name="threads" value="8" min="1" />
-
 	<inputs>
 		<repeat name="samples" title="Samples" min="1">
 			<param format="bam,sam" name="mapped_reads" type="data" label="Alignment file" help="Mapped reads in BAM or SAM format."/>
 		</repeat>

 		<param format="fa,fasta" name="reference_genome" type="data" label="Gene Model Annotations" help="Reference genome (genome.fa) that corresponds to the *.bam file." />
+		<input type="text" name="region" label="region in which pileup is generated, leave empy for entire genome" />
+
+
+		<input type="integer" name="samtools_threads" value="8" min="1" title="Samtools: mpileup threads" />
+
+
+
+		<input type="boolean" name="varscan_vcf_output" falsevalue=" --output-vcf 0" truevalue=" --output-vcf 1" title="VarScan: VCF output" />
 	</inputs>

 	<outputs>