Mercurial > repos > yhoogstrate > varscan_mpileup2snp_from_bam
changeset 10:29ac0abd267e
Uploaded
author | yhoogstrate |
---|---|
date | Tue, 17 Dec 2013 10:51:58 -0500 |
parents | bd04d35e443d |
children | dd96b65174df |
files | varscan2_from_bam.xml |
diffstat | 1 files changed, 115 insertions(+), 74 deletions(-) [+] |
line wrap: on
line diff
--- a/varscan2_from_bam.xml Tue Dec 17 06:36:29 2013 -0500 +++ b/varscan2_from_bam.xml Tue Dec 17 10:51:58 2013 -0500 @@ -5,80 +5,64 @@ <requirement type="package" version="1.0.19">samtools-mpileup-parallel</requirement> <requirement type="package" version="2.3.6">VarScan</requirement> </requirements> - <command interpreter="bash"> - <!-- - VarScan2 url: - http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.6.jar?r=http%3A%2F%2Fsourceforge.net%2Fprojects%2Fvarscan%2Ffiles%2F&ts=1377264954&use_mirror=freefr - - The following script is written in the "Cheetah" language: - http://www.cheetahtemplate.org/docs/users_guide_html_multipage/contents.html - --> + <command> + samtools-mpileup-parallel mpileup + -t $samtools_threads + -f $reference_genome + + #if $extended_parameters_regions.samtools_regions == "region" + -r $extended_parameters_regions.$samtools_r + #elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed" + -l $extended_parameters_regions.$samtools_l + #end if - <!-- - Usage: samtools mpileup [options] in1.bam [in2.bam [...]] - - Input options: - -6 assume the quality is in the Illumina-1.3+ encoding - -A count anomalous read pairs - -B disable BAQ computation - -b FILE list of input BAM filenames, one per line [null] - -C INT parameter for adjusting mapQ; 0 to disable [0] - -d INT max per-BAM depth to avoid excessive memory usage [250] - -E recalculate extended BAQ on the fly thus ignoring existing BQs - -f FILE faidx indexed reference sequence file [null] - -G FILE exclude read groups listed in FILE [null] - -l FILE list of positions (chr pos) or regions (BED) [null] - -M INT cap mapping quality at INT [60] - -r STR region in which pileup is generated [null] - -R ignore RG tags - -q INT skip alignments with mapQ smaller than INT [0] - -Q INT skip bases with baseQ/BAQ smaller than INT [13] - --rf INT required flags: skip reads with mask bits unset [] - --ff INT filter flags: skip reads with mask bits set [] - -t INT Number of parallel threads + #if $extended_parameters.parameters == "extended" + $extended_parameters.samtools_6 + $extended_parameters.samtools_A + $extended_parameters.samtools_B + -C $extended_parameters.samtools_C + -d $extended_parameters.samtools_d + $extended_parameters.samtools_E + -M $extended_parameters.samtools_M + $extended_parameters.samtools_R + -q $extended_parameters.samtools_q + -Q $extended_parameters.samtools_Q - Output options: - -D output per-sample DP in BCF (require -g/-u) - -g generate BCF output (genotype likelihoods) - -O output base positions on reads (disabled by -g/-u) - -s output mapping quality (disabled by -g/-u) - -S output per-sample strand bias P-value in BCF (require -g/-u) - -u generate uncompress BCF output - - SNP/INDEL genotype likelihoods options (effective with `-g' or `-u'): - -e INT Phred-scaled gap extension seq error probability [20] - -F FLOAT minimum fraction of gapped reads for candidates [0.002] - -h INT coefficient for homopolymer errors [100] - -I do not perform indel calling - -L INT max per-sample depth for INDEL calling [250] - -m INT minimum gapped reads for indel candidates [1] - -o INT Phred-scaled gap open sequencing error probability [40] - -p apply -m and -F per-sample to increase sensitivity - -P STR comma separated list of platforms for indels [all] - --> + -e $extended_parameters.samtools_e + -F $extended_parameters.samtools_F + -h $extended_parameters.samtools_h + $extended_parameters.samtools_I + -L $extended_parameters.samtools_L + -m $extended_parameters.samtools_m + -o $extended_parameters.samtools_o + $extended_parameters.samtools_p + -P $extended_parameters.samtools_P + #end if - <!-- - USAGE: java -jar VarScan.jar mpileup2cns [pileup file] OPTIONS - mpileup file - The SAMtools mpileup file - - OPTIONS: - --min-coverage Minimum read depth at a position to make a call [8] - --min-reads2 Minimum supporting reads at a position to call variants [2] - --min-avg-qual Minimum base quality at a position to count a read [15] - --min-var-freq Minimum variant allele frequency threshold [0.01] - --min-freq-for-hom Minimum frequency to call homozygote [0.75] - --p-value Default p-value threshold for calling variants [99e-02] - --strand-filter Ignore variants with >90% support on one strand [1] - --output-vcf If set to 1, outputs in VCF format - --vcf-sample-list For VCF output, a list of sample names in order, one per line - --variants Report only variant (SNP/indel) positions [0] - --> - - samtools-mpileup-parallel mpileup -t $samtools_threads -f $reference_genome #for $sample in $samples ${sample.mapped_reads} #end for - | java -jar VarScan.v2.3.6.jar mpileup2snp $varscan_vcf_output > $output_table + + | java + -Xmx64G + -jar VarScan.v2.3.6.jar + mpileup2snp + + #if $extended_parameters.parameters == "extended" + --min-coverage $varscan_min_coverage + --min-reads2 $varscan_min_reads2 + --min-avg-qual $varscan_min_avg_qual + --min-var-freq $varscan_min_var_freq + --min-freq-for-hom $varscan_min_freq_for_hom + --p-value $varscan_p_value + $varscan_strand_filter + $varscan_output_vcf + $varscan_variants + #end if + + $varscan_output_vcf + + > $output_table </command> <inputs> @@ -86,15 +70,72 @@ <param format="bam,sam" name="mapped_reads" type="data" label="Alignment file" help="Mapped reads in BAM or SAM format."/> </repeat> + <!-- Find out how to access the reference genome from the BAM file(s) --> <param format="fa,fasta" name="reference_genome" type="data" label="Gene Model Annotations" help="Reference genome (genome.fa) that corresponds to the *.bam file." /> - <input type="text" name="region" label="region in which pileup is generated, leave empy for entire genome" /> + <conditional name="extended_parameters_regions"> + <param name="samtools_regions" type="select" label="VarScan parameters" help="For more advanced VarScan settings."> + <option value="entire_genome">Entire genome</option> + <option value="region">Specific region</option> + <option value="regions_file_pos">Specific positions (file); list of positions</option> + <option value="regions_file_bed">Specific regions (file); list of regions in BED</option> + </param> + <when value="entire_genome"> + </when> + <when value="region"> + <param type="text" name="samtools_r" label="Samtools: region in which pileup is generated" help="chr:pos or chr:start-end" /> + </when> + <when value="regions_file_pos"> + <param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" /> + </when> + <when value="regions_file_bed"> + <param type="data" name="samtools_l" format="bed" label="Samtools: specific regions (BED)" /> + </when> + </conditional> + + <param type="integer" name="samtools_threads" value="8" min="1" label="Samtools: mpileup threads" /> - <input type="integer" name="samtools_threads" value="8" min="1" title="Samtools: mpileup threads" /> - + <conditional name="extended_parameters"> + <param name="parameters" type="select" label="VarScan parameters" help="For more advanced VarScan settings."> + <option value="default">Default settings</option> + <option value="extended">Extended settings</option> + </param> + <when value="default"> + </when> + <when value="extended"> + <param type="boolean" name="samtools_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" /> + <param type="boolean" name="samtools_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" /> + <param type="boolean" name="samtools_B" falsevalue="" truevalue=" -B" label="Samtools: disable BAQ computation" /> + <param type="integer" name="samtools_C" value="0" label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" /> + <param type="integer" name="samtools_d" value="250" label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" /> + <param type="boolean" name="samtools_E" falsevalue="" truevalue=" -E" label="Samtools: recalculate extended BAQ on the fly thus ignoring existing BQs" /> + <param type="integer" name="samtools_M" value="60" label="cap mapping quality at INT [60]" /> + <param type="boolean" name="samtools_R" falsevalue="" truevalue=" -R" label="Samtools: ignore RG tags" /> + <param type="integer" name="samtools_q" value="0" label="Samtools: skip alignments with mapQ smaller than INT [0]" /> + <param type="integer" name="samtools_Q" value="13" label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" /> + + <param type="integer" name="samtools_e" value="20" label="Samtools: Phred-scaled gap extension seq error probability [20]" /> + <param type="float" name="samtools_F" value="0.002" label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" /> + <param type="integer" name="samtools_h" value="100" label="Samtools: coefficient for homopolymer errors [100]" /> + <param type="boolean" name="samtools_I" falsevalue="" truevalue=" -I" label="Samtools: do not perform indel calling" /> + <param type="integer" name="samtools_L" value="250" label="Samtools: max per-sample depth for INDEL calling [250]" /> + <param type="integer" name="samtools_m" value="1" label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" /> + <param type="integer" name="samtools_o" value="40" label="Samtools: Phred-scaled gap open sequencing error probability [40]" /> + <param type="boolean" name="samtools_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" /> + <param type="text" name="samtools_P" value="all" label="Samtools: comma separated list of platforms for indels [all]" /> + + <param type="integer" name="varscan_min_coverage" value="8" label="VarScan: Minimum read depth at a position to make a call [8]" /> + <param type="integer" name="varscan_min_reads2" value="2" label="VarScan: PMinimum supporting reads at a position to call variants [2]" /> + <param type="integer" name="varscan_min_avg_qual" value="15" label="VarScan: Minimum base quality at a position to count a read [15]" /> + <param type="float" name="varscan_min_var_freq" value="0.01" label="VarScan: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" /> + <param type="float" name="varscan_min_freq_for_hom" value="0.75" label="VarScan: Minimum frequency to call homozygote [0.75]" /> + <param type="float" name="varscan_p_value" value="0.99" label="VarScan: Default p-value threshold for calling variants [99e-02]" /> + <param type="boolean" name="varscan_strand_filter" falsevalue=" --strand_filter 0" truevalue=" --strand_filter 1" checked="true" label="VarScan: Ignore variants with >90% support on one strand [1]" /> + <param type="boolean" name="varscan_variants" falsevalue=" --variants 0" truevalue=" --variants 1" label="VarScan: Report only variant (SNP/indel) positions [0]" /> + </when> + </conditional> - - <input type="boolean" name="varscan_vcf_output" falsevalue=" --output-vcf 0" truevalue=" --output-vcf 1" title="VarScan: VCF output" /> + <param type="boolean" name="varscan_output_vcf" falsevalue=" --output-vcf 0" truevalue=" --output-vcf 1" label="VarScan: If set to 1, outputs in VCF format" /> </inputs> <outputs> @@ -102,6 +143,6 @@ </outputs> <help> - VarScan2.3.6. + VarScan2.3.6 </help> </tool>