# HG changeset patch # User yhoogstrate # Date 1391764446 18000 # Node ID ea9a28803468bdb5dfd69ee4b135d570757e91b3 # Parent dc091c635cf0fcf2cc751368f03a1d7cb6d996e3 Uploaded diff -r dc091c635cf0 -r ea9a28803468 varscan2_snp_from_bam.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varscan2_snp_from_bam.xml Fri Feb 07 04:14:06 2014 -0500 @@ -0,0 +1,218 @@ + + + VarScan2 SNP/SNV detection (1: directly reading *.bam file(s), 2: using parallel mpileup generation, to avoid unncessairy I/O overhead and increase performance. + + samtools-mpileup-parallel + VarScan + + + #if $reference_genome_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1 + echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/all_fasta.loc' is configured properly!" >&2 + #else + samtools-mpileup-parallel mpileup + -t $samtools_threads + -f + #if $reference_genome_source.source_select == "indexed" + "$reference_genome_source.reference_genome" + #else if $reference_genome_source.source_select == "history" + "$reference_genome_source.reference_genome" + #else + + "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }" + #end if + + #if $extended_parameters_regions.samtools_regions == "region" + -r $extended_parameters_regions.$samtools_r + #elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed" + -l $extended_parameters_regions.$samtools_l + #end if + + #if $extended_parameters.parameters == "extended" + $extended_parameters.samtools_6 + $extended_parameters.samtools_A + $extended_parameters.samtools_B + -C $extended_parameters.samtools_C + -d $extended_parameters.samtools_d + $extended_parameters.samtools_E + -M $extended_parameters.samtools_M + $extended_parameters.samtools_R + -q $extended_parameters.samtools_q + -Q $extended_parameters.samtools_Q + + -e $extended_parameters.samtools_e + -F $extended_parameters.samtools_F + -h $extended_parameters.samtools_h + $extended_parameters.samtools_I + -L $extended_parameters.samtools_L + -m $extended_parameters.samtools_m + -o $extended_parameters.samtools_o + $extended_parameters.samtools_p + -P $extended_parameters.samtools_P + #end if + + #for $alignment in $alignments + ${alignment} + #end for + + | java + -Xmx64G + -jar VarScan.v2.3.6.jar + mpileup2snp + + #if $extended_parameters.parameters == "extended" + --min-coverage $varscan_min_coverage + --min-reads2 $varscan_min_reads2 + --min-avg-qual $varscan_min_avg_qual + --min-var-freq $varscan_min_var_freq + --min-freq-for-hom $varscan_min_freq_for_hom + --p-value $varscan_p_value + $varscan_strand_filter + $varscan_output_vcf + $varscan_variants + #end if + + $varscan_output_vcf + + > $output_table + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +VarScan2.3.6: + +**VarScan2 Overview** + +VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems. +http://dx.doi.org/10.1101/gr.129684.111 +http://www.ncbi.nlm.nih.gov/pubmed/19542151 + +*VarScan* requires mpileup formatted input files, which are generally derived from BAM files. Since mpileup files can become humongous, the interim step of storing it is bypassed. Thus, in this wrapper one or multiple BAM/SAM files go in, get processed into a mpileup file and get directly linked to VarScan. +The samtools package is not able to parallelize the mpileup generation which make it a very slow process. +Other people were aware of this and have written a version that can do parallelization: +https://github.com/mydatascience/parallel-mpileup + +Consequently, when a BAM files gets processed by this wrapper, it's processed by *parallel-mpileup* before its send to VarScan. + +.. _VarScan: http://varscan.sourceforge.net/ + +**Input formats** + +VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing. + +**Installation** + +Make sure your reference genomes are properly annotated in "tool-data/all_fasta.loc", and linked to the names of the reference used for alignment. + +**License** + +* VarScan2.3.6: Non-Profit Open Software License 3.0 (Non-Profit OSL 3.0) +* parallel-mpileup: MIT License (https://github.com/mydatascience/parallel-mpileup/blob/master/samtools-0.1.19/COPYING) + + +**Contact** + +The tool wrapper has been written by Youri Hoogstrate from the Erasmus Medical Center (Rotterdam, Netherlands) on behalf of the Translational Research IT (TraIT) project: +http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch + +