Mercurial > repos > devteam > vcftools_isec
changeset 0:799feac145c8 draft
Uploaded
author | devteam |
---|---|
date | Sun, 24 Nov 2013 11:12:03 -0500 |
parents | |
children | 9e576efe7836 |
files | test-data/1.vcf test-data/2.vcf test-data/out.vcf tool_dependencies.xml vcftools_isec.xml |
diffstat | 5 files changed, 127 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.vcf Sun Nov 24 11:12:03 2013 -0500 @@ -0,0 +1,27 @@ +##fileformat=VCFv4.1 +##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position"> +##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand"> +##FORMAT=<ID=ABQ,Number=1,Type=Integer,Description="Average quality of variant-supporting bases (qual2)"> +##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)"> +##FORMAT=<ID=ADF,Number=1,Type=Integer,Description="Depth of variant-supporting bases on forward strand (reads2plus)"> +##FORMAT=<ID=ADR,Number=1,Type=Integer,Description="Depth of variant-supporting bases on reverse strand (reads2minus)"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Quality Read Depth of bases with Phred score >= 15"> +##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PVAL,Number=1,Type=String,Description="P-value from Fisher's Exact Test"> +##FORMAT=<ID=RBQ,Number=1,Type=Integer,Description="Average quality of reference-supporting bases (qual1)"> +##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)"> +##FORMAT=<ID=RDF,Number=1,Type=Integer,Description="Depth of reference-supporting bases on forward strand (reads1plus)"> +##FORMAT=<ID=RDR,Number=1,Type=Integer,Description="Depth of reference-supporting bases on reverse strand (reads1minus)"> +##FORMAT=<ID=SDP,Number=1,Type=Integer,Description="Raw Read Depth as reported by SAMtools"> +##INFO=<ID=ADP,Number=1,Type=Integer,Description="Average per-sample depth of bases with Phred score >= 15"> +##INFO=<ID=HET,Number=1,Type=Integer,Description="Number of samples called heterozygous-variant"> +##INFO=<ID=HOM,Number=1,Type=Integer,Description="Number of samples called homozygous-variant"> +##INFO=<ID=NC,Number=1,Type=Integer,Description="Number of samples not called"> +##INFO=<ID=WT,Number=1,Type=Integer,Description="Number of samples called reference (wild-type)"> +##source=VarScan2 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample7 +chr1 14653 . C T . PASS ADP=30;HET=7;HOM=0;NC=0;WT=0 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 0/1:0:18:18:13:5:27.78%:9.8E-1:37:38:7:6:5:0 +chr1 14907 . A G . PASS ADP=18;HET=2;HOM=2;NC=1;WT=2 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 0/1:0:19:19:5:8:61.54%:9.8E-1:37:18:5:0:5:3 +chr1 14930 . A G . PASS ADP=19;HET=2;HOM=2;NC=1;WT=2 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 0/1:0:24:22:5:11:68.75%:9.8E-1:35:24:4:1:5:6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2.vcf Sun Nov 24 11:12:03 2013 -0500 @@ -0,0 +1,25 @@ +##fileformat=VCFv4.1 +##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position"> +##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand"> +##FORMAT=<ID=ABQ,Number=1,Type=Integer,Description="Average quality of variant-supporting bases (qual2)"> +##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)"> +##FORMAT=<ID=ADF,Number=1,Type=Integer,Description="Depth of variant-supporting bases on forward strand (reads2plus)"> +##FORMAT=<ID=ADR,Number=1,Type=Integer,Description="Depth of variant-supporting bases on reverse strand (reads2minus)"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Quality Read Depth of bases with Phred score >= 15"> +##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PVAL,Number=1,Type=String,Description="P-value from Fisher's Exact Test"> +##FORMAT=<ID=RBQ,Number=1,Type=Integer,Description="Average quality of reference-supporting bases (qual1)"> +##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)"> +##FORMAT=<ID=RDF,Number=1,Type=Integer,Description="Depth of reference-supporting bases on forward strand (reads1plus)"> +##FORMAT=<ID=RDR,Number=1,Type=Integer,Description="Depth of reference-supporting bases on reverse strand (reads1minus)"> +##FORMAT=<ID=SDP,Number=1,Type=Integer,Description="Raw Read Depth as reported by SAMtools"> +##INFO=<ID=ADP,Number=1,Type=Integer,Description="Average per-sample depth of bases with Phred score >= 15"> +##INFO=<ID=HET,Number=1,Type=Integer,Description="Number of samples called heterozygous-variant"> +##INFO=<ID=HOM,Number=1,Type=Integer,Description="Number of samples called homozygous-variant"> +##INFO=<ID=NC,Number=1,Type=Integer,Description="Number of samples not called"> +##INFO=<ID=WT,Number=1,Type=Integer,Description="Number of samples called reference (wild-type)"> +##source=VarScan2 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample7 +chr1 14907 . A G . PASS ADP=18;HET=2;HOM=2;NC=1;WT=2 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 0/1:0:19:19:5:8:61.54%:9.8E-1:37:18:5:0:5:3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out.vcf Sun Nov 24 11:12:03 2013 -0500 @@ -0,0 +1,6 @@ +##fileformat=VCFv4.1 +##source_20130524.1=vcf-isec(r797) 1.vcf.header.gz 2.vcf.header.gz +##sourceFiles_20130524.1=0:1.vcf.header.gz,1:2.vcf.header.gz +##INFO=<ID=SF,Number=.,Type=String,Description="Source File (index to sourceFiles, f when filtered)"> +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 14907 . A G . PASS ADP=18;HET=2;HOM=2;NC=1;WT=2;SF=0,1 GT:GQ:SDP:DP:RD:AD:FREQ:PVAL:RBQ:ABQ:RDF:RDR:ADF:ADR 0/1:0:19:19:5:8:61.54%:9.8E-1:37:18:5:0:5:3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Sun Nov 24 11:12:03 2013 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="vcftools" version="0.1.11"> + <repository changeset_revision="4081df719fff" name="package_vcftools_0_1_11" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vcftools_isec.xml Sun Nov 24 11:12:03 2013 -0500 @@ -0,0 +1,63 @@ +<tool id="vcftools_isec" name="Intersect" version="0.1"> + <description>multiple VCF datasets</description> + + <requirements> + <requirement type="package">tabix</requirement> + <requirement type="package" version="0.1.11">vcftools</requirement> + </requirements> + + <command> + ## Preprocessing for each dataset. + #set dataset_names = [] + #for i, $input in enumerate( $inputs ): + ## Sort file. + vcf-sort ${input.file} > ${i}.vcf.sorted ; + + ## Compress. + bgzip ${i}.vcf.sorted ; + + ## Index. + tabix -p vcf ${i}.vcf.sorted.gz ; + + #silent dataset_names.append( str($i) + '.vcf.sorted.gz' ) + #end for + + ## Intersect. + vcf-isec -f + #if $complement: + -c + #end if + #echo ' '.join( dataset_names ) # > ${output} + </command> + <inputs> + <repeat name="inputs" title="Dataset" min="2"> + <param name="file" label="Dataset" type="data" format="vcf"/> + </repeat> + <param name="complement" type="boolean" label="Complement intersection" help="If checked, output positions present in the first file but missing from the other files"/> + </inputs> + + <outputs> + <data name="output" format="vcf"/> + </outputs> + + <stdio> + <regex match=".*" source="both" level="log" description="tool progress"/> + </stdio> + + <tests> + <!-- Cannot specify multiple repeats in test framework right now. + <test> + <param name='inputs|1' value='1.vcf' /> + <param name='inputs|2' value='2.vcf' /> + <param name='complement' value='False' /> + <output name='output' file='out.vcf' /> + </test> + --> + </tests> + + <help> + Please see the VCFtools `documentation`__ for help and further information. + + .. __: http://vcftools.sourceforge.net/docs.html + </help> +</tool>