changeset 2:89702e7ec3c3 draft

Uploaded
author devteam
date Sat, 16 Nov 2013 18:48:12 -0500
parents 306a1fcf3369
children f82940004c84
files test-data/test_in1.pileup test-data/test_out1.vcf tool-dependencies.xml varscan_mpileup.xml
diffstat 4 files changed, 213 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_in1.pileup	Sat Nov 16 18:48:12 2013 -0500
@@ -0,0 +1,13 @@
+chr1	10087	a	41	..+1C...........,,,,,.....C..C,...,..,,..c,.+1C	A9D<<#C#<(C9ACAFF<?D>=(#;D#;AF=7898.GJ'6I
+chr1	10088	c	41	.......T.....,,,,,.........,..A,..,,..,,.	2BBB<#B#A5?5?GJ;JD?@A?;#6B#HEG)GBBIB>IEE!
+chr1	10089	c	41	.............,,,,,.........,...,..,,..,,.	8BBB<#D#?5A9AHJ;IFBBB??#6A#FEG8C=>H?FJ@C$
+chr1	10090	c	41	.............,,,,,.........,...,..,,..,,.	2#AB(#9#9(A<<GJEHEBBDA?#;;#D?H@5=CF1HJ;D'
+chr1	10091	t	41	.............,,,,,A........,...,..,,..,,.	(#(?5#,#<(?<?FIAF;(D=B9#;@#?#G8-=F=<CJ6BI
+chr1	10092	a	41	.............,,,,,.........,...,..,,..,,.	9#9C<#5#9,5(<FH:GE9C?<?#3A#:#A@B8CC5EI=?J
+chr1	10093	a	41	.............,,,,,.........,...c..,c..,,.	?#<DC#9#99?39CFEFC?D<(C#,C#1#HE'7G8'>I);J
+chr1	10094	c	41	.............,,,,,.....G...,...,..,,..,,T	?#?B?#?#<8A8ABJBIG9D?9<#,?#F#IHH(AIGEHGH6
+chr1	10095	c	41	A............,,,,,.........,...,..,,..,,.	##8B<#A#??B8<EJCJB7BAAA#5B#>#>=F(@HHCIBH6
+chr1	10096	c	41	.............,,,,,........A,...,..,,..,,.	##ADA#B#18B?AHI<IE<AA?D#<;#C#E6B=FGCCIBE9
+chr1	10097	t	41	.............,,,,,.........,..C,..,,..-1A,,-1a.	##(B8#B#8(BB?AFAH:(B2??#<?#8#E'6;;B6@J8@J
+chr1	10098	a	41	.............,,,,,.........,...,..,,.*,*.	##3C9#3#9999AFH2H;9D<C?#C3#?#E5?DAD@D!=$J
+chr1	10099	a	41	.............,,,,,.........,...c..,,..,,.	##<D<#9#<9?<9FD+DF9CAA?#39#1#H?)C>9.D!.$J
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out1.vcf	Sat Nov 16 18:48:12 2013 -0500
@@ -0,0 +1,26 @@
+##fileformat=VCFv4.1
+##source=VarScan2
+##INFO=<ID=ADP,Number=1,Type=Integer,Description="Average per-sample depth of bases with Phred score >= 15">
+##INFO=<ID=WT,Number=1,Type=Integer,Description="Number of samples called reference (wild-type)">
+##INFO=<ID=HET,Number=1,Type=Integer,Description="Number of samples called heterozygous-variant">
+##INFO=<ID=HOM,Number=1,Type=Integer,Description="Number of samples called homozygous-variant">
+##INFO=<ID=NC,Number=1,Type=Integer,Description="Number of samples not called">
+##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
+##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=SDP,Number=1,Type=Integer,Description="Raw Read Depth as reported by SAMtools">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Quality Read Depth of bases with Phred score >= 15">
+##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
+##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
+##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
+##FORMAT=<ID=PVAL,Number=1,Type=String,Description="P-value from Fisher's Exact Test">
+##FORMAT=<ID=RBQ,Number=1,Type=Integer,Description="Average quality of reference-supporting bases (qual1)">
+##FORMAT=<ID=ABQ,Number=1,Type=Integer,Description="Average quality of variant-supporting bases (qual2)">
+##FORMAT=<ID=RDF,Number=1,Type=Integer,Description="Depth of reference-supporting bases on forward strand (reads1plus)">
+##FORMAT=<ID=RDR,Number=1,Type=Integer,Description="Depth of reference-supporting bases on reverse strand (reads1minus)">
+##FORMAT=<ID=ADF,Number=1,Type=Integer,Description="Depth of variant-supporting bases on forward strand (reads2plus)">
+##FORMAT=<ID=ADR,Number=1,Type=Integer,Description="Depth of variant-supporting bases on reverse strand (reads2minus)">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	Sample1
+chr1	10087	.	A	AC	.	PASS	ADP=33;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:41:33:31:2:6.06%:9.8E-1:30:32:22:9:2:0
+chr1	10097	.	TA	T	.	PASS	ADP=30;WT=0;HET=1;HOM=0;NC=0	GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR	0/1:0:41:30:28:2:6.67%:9.8E-1:29:36:18:10:1:1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-dependencies.xml	Sat Nov 16 18:48:12 2013 -0500
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="varscan" version="2.3.6">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.6.jar</action>
+                <action type="move_file">
+                    <source>VarScan.v2.3.6.jar</source>
+                    <destination>$INSTALL_DIR/jars</destination>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="JAVA_JAR_PATH" action="set_to">$INSTALL_DIR/jars</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+        </readme>
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan_mpileup.xml	Sat Nov 16 18:48:12 2013 -0500
@@ -0,0 +1,155 @@
+<tool id="varscan" name="Varscan" version="0.1">
+    <description>for variant detection</description>
+
+    <requirements>
+        <requirement type="package" version="2.3.6">varscan</requirement>
+    </requirements>
+
+    <!-- 
+    The version command string is not yet a template that can be filled in, so version command is not yet possible.
+    <version_command>java -jar ${GALAXY_DATA_INDEX_DIR}/shared/jars/varscan/VarScan.jar 2>&amp;1 | head -n 1</version_command>
+    -->
+
+    <command>
+        ## Set up samples list file.
+        #if $sample_names.strip() != '':
+	       echo $sample_names | awk -F ',' '{ for (i = 1; i &lt;= NF; i++) { print \$i; } }' > samples_list.txt;
+        #end if
+
+        ## Set up command + input.
+        java -jar \$JAVA_JAR_PATH/VarScan.v2.3.6.jar ${cmd} ${input}
+        --min-coverage ${min_coverage} 
+        --min-reads2 ${min_supporting_reads} 
+        --min-avg-qual ${min_avg_qual}
+        --min-var-freq ${min_var_freq}
+        --min-freq-for-hom ${min_freq_for_hom}
+        --p-value ${p_value}
+        #if str($strand_filter) == 'yes':
+          --strand-filter 1
+        #end if
+
+        ## Report only variants in consensus.
+        #if str($cmd) == 'mpileup2cns':
+          --variants
+        #end if
+        
+        ## Set up outputs.
+        --output-vcf 1 > $output
+
+        #if $sample_names.strip() != '':
+            --vcf-sample-list samples_list.txt
+        #end if
+    </command>
+
+    <inputs>
+        <param format="pileup" name="input" type="data" label="Pileup dataset" help=""/>
+
+        <param name="cmd" type="select" label="Analysis type">
+          <option value="mpileup2snp" selected="True">single nucleotide variation</option>
+          <option value="mpileup2indel">insertions and deletions</option>
+          <option value="mpileup2cns">consensus genotype</option>
+        </param>
+
+        <param name="min_coverage" type="integer" value="8" min="1" max="200" label="Minimum read depth" help="Minimum depth at a position to make a call"/>
+        <param name="min_supporting_reads" type="integer" value="2" min="1" max="200" label="Minimum supporting reads" help="Minimum supporting reads at a position to make a call"/>
+        <param name="min_avg_qual" type="integer" value="15" min="1" max="50" label="Minimum base quality at a position to count a read"/>
+        <param name="min_var_freq" type="float" value="0.01" min="0" max="1" label="Minimum variant allele frequency threshold"/>
+        <param name="min_freq_for_hom" type="float" value="0.75" min="0" max="1" label="Minimum frequency to call homozygote"/>
+        <param name="p_value" type="float" value="0.99" min="0" max="1" label="p-value threshold for calling variants"/>
+        <param name="strand_filter" type="select" label="Ignore variants with >90% support on one strand">
+            <option value="no" selected="True">no</option>
+            <option value="yes">yes</option>
+        </param>
+        <param name="sample_names" type="text" value="" help="Separate sample names by comma; leave blank to use default sample names."/>
+    </inputs>
+
+    <stdio>
+        <regex match="Exception" source="both" level="fatal" description="Tool exception"/>
+        <regex match=".*" source="both" level="log" description="tool progress"/>
+    </stdio>
+
+    <outputs>
+        <data name="output" format="vcf"/>
+    </outputs>
+
+    <trackster_conf>
+    </trackster_conf>
+
+    <tests>
+        <test>
+            <param name="input" value="test_in1.pileup" />
+            <param name="cmd" value="mpileup2cns" />
+            <param name="min_coverage" value="8" />
+            <param name="min_supporting_reads" value="2" />
+            <param name="min_avg_qual" value="15" />
+            <param name="min_var_freq" value="0.01" />
+            <param name="min_freq_for_hom" value="0.75" />
+            <param name="p_value" value="0.99" />
+            <param name="strand_filter" value="no" />
+            <param name="sample_names" value="" />
+            <output name="output" file="test_out1.vcf" lines_diff="0" />
+        </test>
+    </tests>
+
+    <help>
+**VarScan Overview**
+
+VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data. It calls variants from a mpileup dataset and produces a VCF 4.1 Full documentation is available online_.
+
+Please cite: Koboldt, D., Zhang, Q., Larson, D., Shen, D., McLellan, M., Lin, L., Miller, C., Mardis, E., Ding, L., and Wilson, R. (2012). VarScan 2: Somatic mutation and copy number alteration discovery in cancer by exome sequencing Genome Research DOI: 10.1101/gr.129684.111 
+
+.. _VarScan: http://varscan.sourceforge.net/
+.. _online: http://varscan.sourceforge.net/using-varscan.html
+
+**Input**
+
+::
+
+  mpileup file - The SAMtools mpileup file
+ 
+
+**Output**
+
+VarScan produces a VCF 4.1 dataset as output.
+
+**Parameters**
+
+::
+
+  analysis type
+    single nucleotide detection     Identify SNPs from an mpileup file
+    insertions and deletion       Identify indels an mpileup file
+    consensus genotype     Call consensus and variants from an mpileup file
+
+  min-coverage  
+    Minimum read depth at a position to make a call [8]
+
+  min-reads2    
+    Minimum supporting reads at a position to call variants [2]
+
+  min-avg-qual  
+    Minimum base quality at a position to count a read [15]
+
+  min-var-freq  
+        Minimum variant allele frequency threshold [0.01]
+
+  min-freq-for-hom
+    Minimum frequency to call homozygote [0.75]
+  
+  p-value
+    Default p-value threshold for calling variants [99e-02]
+  
+  strand-filter
+    Ignore variants with >90% support on one strand [1]
+  
+  output-vcf
+    If set to 1, outputs in VCF format
+  
+  vcf-sample-list
+    For VCF output, a list of sample names in order, one per line
+  
+  variants
+    Report only variant (SNP/indel) positions [0]
+
+    </help>
+</tool>