changeset 0:490e605d1f44 draft default tip

planemo upload for repository https://github.com/atks/vt commit 5f1e53104d11817b9f1f93c4df17b77c80bd7472-dirty
author bgruening
date Sat, 04 Jun 2016 12:45:04 -0400
parents
children
files readme.rst test-data/20.fa.bz2 test-data/decompose_result01.vcf test-data/decompose_result02.vcf test-data/infile01.vcf test-data/infile02.vcf test-data/normalize_result01.vcf tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml vt_decompose.xml vt_macros.xml
diffstat 12 files changed, 838 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst	Sat Jun 04 12:45:04 2016 -0400
@@ -0,0 +1,52 @@
+==========================================
+Galaxy wrapper for VT the Variant Tool Set
+==========================================
+
+A tool set for short variant discovery in genetic sequence data.
+
+http://genome.sph.umich.edu/wiki/vt
+
+============
+Installation
+============
+
+It is recommended to install this wrapper via the `Galaxy Tool Shed`.
+
+.. _`Galaxy Tool Shed`:  https://testtoolshed.g2.bx.psu.edu/view/iuc/vt
+
+
+=======
+History
+=======
+- 0.1: Initial public release
+
+
+====================
+Detailed description
+====================
+
+View the original GEMINI documentation: http://genome.sph.umich.edu/wiki/vt
+
+
+===============================
+Wrapper Licence (MIT/BSD style)
+===============================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
Binary file test-data/20.fa.bz2 has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/decompose_result01.vcf	Sat Jun 04 12:45:04 2016 -0400
@@ -0,0 +1,124 @@
+##fileformat=VCFv4.0
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=1,length=249250621,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+##contig=<ID=4,length=191154276,assembly=b37>
+##contig=<ID=5,length=180915260,assembly=b37>
+##contig=<ID=6,length=171115067,assembly=b37>
+##contig=<ID=7,length=159138663,assembly=b37>
+##contig=<ID=8,length=146364022,assembly=b37>
+##contig=<ID=9,length=141213431,assembly=b37>
+##contig=<ID=10,length=135534747,assembly=b37>
+##contig=<ID=11,length=135006516,assembly=b37>
+##contig=<ID=12,length=133851895,assembly=b37>
+##contig=<ID=13,length=115169878,assembly=b37>
+##contig=<ID=14,length=107349540,assembly=b37>
+##contig=<ID=15,length=102531392,assembly=b37>
+##contig=<ID=16,length=90354753,assembly=b37>
+##contig=<ID=17,length=81195210,assembly=b37>
+##contig=<ID=18,length=78077248,assembly=b37>
+##contig=<ID=19,length=59128983,assembly=b37>
+##contig=<ID=20,length=63025520,assembly=b37>
+##contig=<ID=21,length=48129895,assembly=b37>
+##contig=<ID=22,length=51304566,assembly=b37>
+##contig=<ID=X,length=155270560,assembly=b37>
+##contig=<ID=Y,length=59373566,assembly=b37>
+##contig=<ID=MT,length=16569,assembly=b37>
+##contig=<ID=GL000207.1,length=4262,assembly=b37>
+##contig=<ID=GL000226.1,length=15008,assembly=b37>
+##contig=<ID=GL000229.1,length=19913,assembly=b37>
+##contig=<ID=GL000231.1,length=27386,assembly=b37>
+##contig=<ID=GL000210.1,length=27682,assembly=b37>
+##contig=<ID=GL000239.1,length=33824,assembly=b37>
+##contig=<ID=GL000235.1,length=34474,assembly=b37>
+##contig=<ID=GL000201.1,length=36148,assembly=b37>
+##contig=<ID=GL000247.1,length=36422,assembly=b37>
+##contig=<ID=GL000245.1,length=36651,assembly=b37>
+##contig=<ID=GL000197.1,length=37175,assembly=b37>
+##contig=<ID=GL000203.1,length=37498,assembly=b37>
+##contig=<ID=GL000246.1,length=38154,assembly=b37>
+##contig=<ID=GL000249.1,length=38502,assembly=b37>
+##contig=<ID=GL000196.1,length=38914,assembly=b37>
+##contig=<ID=GL000248.1,length=39786,assembly=b37>
+##contig=<ID=GL000244.1,length=39929,assembly=b37>
+##contig=<ID=GL000238.1,length=39939,assembly=b37>
+##contig=<ID=GL000202.1,length=40103,assembly=b37>
+##contig=<ID=GL000234.1,length=40531,assembly=b37>
+##contig=<ID=GL000232.1,length=40652,assembly=b37>
+##contig=<ID=GL000206.1,length=41001,assembly=b37>
+##contig=<ID=GL000240.1,length=41933,assembly=b37>
+##contig=<ID=GL000236.1,length=41934,assembly=b37>
+##contig=<ID=GL000241.1,length=42152,assembly=b37>
+##contig=<ID=GL000243.1,length=43341,assembly=b37>
+##contig=<ID=GL000242.1,length=43523,assembly=b37>
+##contig=<ID=GL000230.1,length=43691,assembly=b37>
+##contig=<ID=GL000237.1,length=45867,assembly=b37>
+##contig=<ID=GL000233.1,length=45941,assembly=b37>
+##contig=<ID=GL000204.1,length=81310,assembly=b37>
+##contig=<ID=GL000198.1,length=90085,assembly=b37>
+##contig=<ID=GL000208.1,length=92689,assembly=b37>
+##contig=<ID=GL000191.1,length=106433,assembly=b37>
+##contig=<ID=GL000227.1,length=128374,assembly=b37>
+##contig=<ID=GL000228.1,length=129120,assembly=b37>
+##contig=<ID=GL000214.1,length=137718,assembly=b37>
+##contig=<ID=GL000221.1,length=155397,assembly=b37>
+##contig=<ID=GL000209.1,length=159169,assembly=b37>
+##contig=<ID=GL000218.1,length=161147,assembly=b37>
+##contig=<ID=GL000220.1,length=161802,assembly=b37>
+##contig=<ID=GL000213.1,length=164239,assembly=b37>
+##contig=<ID=GL000211.1,length=166566,assembly=b37>
+##contig=<ID=GL000199.1,length=169874,assembly=b37>
+##contig=<ID=GL000217.1,length=172149,assembly=b37>
+##contig=<ID=GL000216.1,length=172294,assembly=b37>
+##contig=<ID=GL000215.1,length=172545,assembly=b37>
+##contig=<ID=GL000205.1,length=174588,assembly=b37>
+##contig=<ID=GL000219.1,length=179198,assembly=b37>
+##contig=<ID=GL000224.1,length=179693,assembly=b37>
+##contig=<ID=GL000223.1,length=180455,assembly=b37>
+##contig=<ID=GL000195.1,length=182896,assembly=b37>
+##contig=<ID=GL000212.1,length=186858,assembly=b37>
+##contig=<ID=GL000222.1,length=186861,assembly=b37>
+##contig=<ID=GL000200.1,length=187035,assembly=b37>
+##contig=<ID=GL000193.1,length=189789,assembly=b37>
+##contig=<ID=GL000194.1,length=191469,assembly=b37>
+##contig=<ID=GL000225.1,length=211173,assembly=b37>
+##contig=<ID=GL000192.1,length=547496,assembly=b37>
+##FilterLiftedVariants="analysis_type=FilterLiftedVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null reference_sequence=/local/sequence/reference/BWA_ref/hg19/hg19.fasta rodBind=[/local/scratch/xyliu/0.915136538286792.sorted.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false enable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null quiet_output_mode=false debug_mode=false help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub"
+##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
+##INFO=<ID=refseq.name,Number=1,Type=String,Description="RefSeq name">
+##INFO=<ID=refseq.positionType,Number=1,Type=String,Description="RefSeq genome type position">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##INFO=<ID=OLD_MULTIALLELIC,Number=1,Type=String,Description="Original chr:pos:ref:alt encoding">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+20	421808	.	A	ACCA	.	PASS	VC=INDEL;AC=24;AF=0.08;AN=316;refseq.name=NM_144628;refseq.positionType=intron
+20	1292033	.	C	CTTGT	.	PASS	VC=INDEL;AC=28;AF=0.1;AN=276;refseq.name=NM_080489;refseq.positionType=intron
+20	1340527	.	T	TGTC	.	PASS	VC=INDEL;AC=56;AF=0.18;AN=316
+20	1600125	.	GAA	G	.	PASS	VC=INDEL;AC=3;AF=0.01;AN=312;refseq.name=NM_001083910;refseq.positionType=intron
+20	1728298	.	G	GT	.	PASS	VC=INDEL;AC=199;AF=0.63;AN=316
+20	2171402	.	T	TA	.	PASS	VC=INDEL;AC=223;AF=0.71;AN=316
+20	2171404	.	A	AA	.	PASS	VC=INDEL;AC=223;AF=0.71;AN=316
+20	2982245	.	CT	C	.	PASS	VC=INDEL;AC=118;AF=0.38;AN=314;refseq.name=NM_002836;refseq.positionType=intron
+20	3025866	.	TCAAA	T	.	PASS	VC=INDEL;AC=31;AF=0.1;AN=306;refseq.name=NM_001501;refseq.positionType=intron
+20	3373441	.	TCTTT	T	.	PASS	VC=INDEL;AC=58;AF=0.18;AN=316;refseq.name=NM_001009984;refseq.positionType=intron
+20	3635159	.	T	TT	.	PASS	VC=INDEL;AC=130;AF=0.47;AN=274
+20	4422119	.	GCTCCCAGGCTACAGAAAGATGATGGAG	G	.	PASS	VC=INDEL;AC=174;AF=0.55;AN=314
+20	5151108	.	GTTCT	G	.	PASS	VC=INDEL;AC=61;AF=0.2;AN=304;refseq.name=NM_003818;refseq.positionType=intron
+20	5280839	.	T	TATA	.	PASS	VC=INDEL;AC=202;AF=0.75;AN=268
+20	5291223	.	TCAG	T	.	PASS	VC=INDEL;AC=51;AF=0.16;AN=316;refseq.name=NM_144773;refseq.positionType=intron
+20	5509358	.	T	TG	.	PASS	VC=INDEL;AC=136;AF=0.43;AN=316
+20	5900669	.	G	GC	.	PASS	VC=INDEL;AC=69;AF=0.23;AN=300;refseq.name=NM_001819;refseq.positionType=intron
+20	5900670	.	C	CC	.	PASS	VC=INDEL;AC=68;AF=0.22;AN=304;refseq.name=NM_001819;refseq.positionType=intron
+20	6351757	.	C	CTT	.	PASS	VC=INDEL;AC=52;AF=0.17;AN=314
+20	6362163	.	GC	G	.	PASS	VC=INDEL;AC=49;AF=0.2;AN=250
+20	6481086	.	T	TTGTC	.	PASS	VC=INDEL;AC=307;AF=0.97;AN=316
+20	8080280	.	GTTTG	G	.	PASS	VC=INDEL;AC=118;AF=0.37;AN=316
+20	8781394	.	AA	A	.	PASS	VC=INDEL;AC=122;AF=0.39;AN=316;refseq.name=NM_015192;refseq.positionType=intron
+20	8833756	.	TT	T	.	PASS	VC=INDEL;AC=13;AF=0.04;AN=298;refseq.name=NM_015192;refseq.positionType=intron
+20	9035330	.	T	TT	.	PASS	VC=INDEL;AC=110;AF=0.35;AN=314
+20	9311904	.	TGTATCTGTCCA	T	.	PASS	VC=INDEL;AC=46;AF=0.15;AN=314;refseq.name=NM_000933;refseq.positionType=intron
+20	9389232	.	GGGTTTGAT	G	.	PASS	VC=INDEL;AC=77;AF=0.25;AN=306;refseq.name=NM_000933;refseq.positionType=intron
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/decompose_result02.vcf	Sat Jun 04 12:45:04 2016 -0400
@@ -0,0 +1,100 @@
+##fileformat=VCFv4.0
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=1,length=249250621,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+##contig=<ID=4,length=191154276,assembly=b37>
+##contig=<ID=5,length=180915260,assembly=b37>
+##contig=<ID=6,length=171115067,assembly=b37>
+##contig=<ID=7,length=159138663,assembly=b37>
+##contig=<ID=8,length=146364022,assembly=b37>
+##contig=<ID=9,length=141213431,assembly=b37>
+##contig=<ID=10,length=135534747,assembly=b37>
+##contig=<ID=11,length=135006516,assembly=b37>
+##contig=<ID=12,length=133851895,assembly=b37>
+##contig=<ID=13,length=115169878,assembly=b37>
+##contig=<ID=14,length=107349540,assembly=b37>
+##contig=<ID=15,length=102531392,assembly=b37>
+##contig=<ID=16,length=90354753,assembly=b37>
+##contig=<ID=17,length=81195210,assembly=b37>
+##contig=<ID=18,length=78077248,assembly=b37>
+##contig=<ID=19,length=59128983,assembly=b37>
+##contig=<ID=20,length=63025520,assembly=b37>
+##contig=<ID=21,length=48129895,assembly=b37>
+##contig=<ID=22,length=51304566,assembly=b37>
+##contig=<ID=X,length=155270560,assembly=b37>
+##contig=<ID=Y,length=59373566,assembly=b37>
+##contig=<ID=MT,length=16569,assembly=b37>
+##contig=<ID=GL000207.1,length=4262,assembly=b37>
+##contig=<ID=GL000226.1,length=15008,assembly=b37>
+##contig=<ID=GL000229.1,length=19913,assembly=b37>
+##contig=<ID=GL000231.1,length=27386,assembly=b37>
+##contig=<ID=GL000210.1,length=27682,assembly=b37>
+##contig=<ID=GL000239.1,length=33824,assembly=b37>
+##contig=<ID=GL000235.1,length=34474,assembly=b37>
+##contig=<ID=GL000201.1,length=36148,assembly=b37>
+##contig=<ID=GL000247.1,length=36422,assembly=b37>
+##contig=<ID=GL000245.1,length=36651,assembly=b37>
+##contig=<ID=GL000197.1,length=37175,assembly=b37>
+##contig=<ID=GL000203.1,length=37498,assembly=b37>
+##contig=<ID=GL000246.1,length=38154,assembly=b37>
+##contig=<ID=GL000249.1,length=38502,assembly=b37>
+##contig=<ID=GL000196.1,length=38914,assembly=b37>
+##contig=<ID=GL000248.1,length=39786,assembly=b37>
+##contig=<ID=GL000244.1,length=39929,assembly=b37>
+##contig=<ID=GL000238.1,length=39939,assembly=b37>
+##contig=<ID=GL000202.1,length=40103,assembly=b37>
+##contig=<ID=GL000234.1,length=40531,assembly=b37>
+##contig=<ID=GL000232.1,length=40652,assembly=b37>
+##contig=<ID=GL000206.1,length=41001,assembly=b37>
+##contig=<ID=GL000240.1,length=41933,assembly=b37>
+##contig=<ID=GL000236.1,length=41934,assembly=b37>
+##contig=<ID=GL000241.1,length=42152,assembly=b37>
+##contig=<ID=GL000243.1,length=43341,assembly=b37>
+##contig=<ID=GL000242.1,length=43523,assembly=b37>
+##contig=<ID=GL000230.1,length=43691,assembly=b37>
+##contig=<ID=GL000237.1,length=45867,assembly=b37>
+##contig=<ID=GL000233.1,length=45941,assembly=b37>
+##contig=<ID=GL000204.1,length=81310,assembly=b37>
+##contig=<ID=GL000198.1,length=90085,assembly=b37>
+##contig=<ID=GL000208.1,length=92689,assembly=b37>
+##contig=<ID=GL000191.1,length=106433,assembly=b37>
+##contig=<ID=GL000227.1,length=128374,assembly=b37>
+##contig=<ID=GL000228.1,length=129120,assembly=b37>
+##contig=<ID=GL000214.1,length=137718,assembly=b37>
+##contig=<ID=GL000221.1,length=155397,assembly=b37>
+##contig=<ID=GL000209.1,length=159169,assembly=b37>
+##contig=<ID=GL000218.1,length=161147,assembly=b37>
+##contig=<ID=GL000220.1,length=161802,assembly=b37>
+##contig=<ID=GL000213.1,length=164239,assembly=b37>
+##contig=<ID=GL000211.1,length=166566,assembly=b37>
+##contig=<ID=GL000199.1,length=169874,assembly=b37>
+##contig=<ID=GL000217.1,length=172149,assembly=b37>
+##contig=<ID=GL000216.1,length=172294,assembly=b37>
+##contig=<ID=GL000215.1,length=172545,assembly=b37>
+##contig=<ID=GL000205.1,length=174588,assembly=b37>
+##contig=<ID=GL000219.1,length=179198,assembly=b37>
+##contig=<ID=GL000224.1,length=179693,assembly=b37>
+##contig=<ID=GL000223.1,length=180455,assembly=b37>
+##contig=<ID=GL000195.1,length=182896,assembly=b37>
+##contig=<ID=GL000212.1,length=186858,assembly=b37>
+##contig=<ID=GL000222.1,length=186861,assembly=b37>
+##contig=<ID=GL000200.1,length=187035,assembly=b37>
+##contig=<ID=GL000193.1,length=189789,assembly=b37>
+##contig=<ID=GL000194.1,length=191469,assembly=b37>
+##contig=<ID=GL000225.1,length=211173,assembly=b37>
+##contig=<ID=GL000192.1,length=547496,assembly=b37>
+##FilterLiftedVariants="analysis_type=FilterLiftedVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null reference_sequence=/local/sequence/reference/BWA_ref/hg19/hg19.fasta rodBind=[/local/scratch/xyliu/0.915136538286792.sorted.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false enable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null quiet_output_mode=false debug_mode=false help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub"
+##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
+##INFO=<ID=refseq.name,Number=1,Type=String,Description="RefSeq name">
+##INFO=<ID=refseq.positionType,Number=1,Type=String,Description="RefSeq genome type position">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##INFO=<ID=OLD_MULTIALLELIC,Number=1,Type=String,Description="Original chr:pos:ref:alt encoding">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+1	3759889	.	TA	TAA	.	PASS	AF=0.342,0.173,0.037;OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T
+1	3759889	.	TA	TAAA	.	PASS	AF=0.342,0.173,0.037;OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T
+1	3759889	.	TA	T	.	PASS	AF=0.342,0.173,0.037;OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/infile01.vcf	Sat Jun 04 12:45:04 2016 -0400
@@ -0,0 +1,123 @@
+##fileformat=VCFv4.0
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=1,length=249250621,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+##contig=<ID=4,length=191154276,assembly=b37>
+##contig=<ID=5,length=180915260,assembly=b37>
+##contig=<ID=6,length=171115067,assembly=b37>
+##contig=<ID=7,length=159138663,assembly=b37>
+##contig=<ID=8,length=146364022,assembly=b37>
+##contig=<ID=9,length=141213431,assembly=b37>
+##contig=<ID=10,length=135534747,assembly=b37>
+##contig=<ID=11,length=135006516,assembly=b37>
+##contig=<ID=12,length=133851895,assembly=b37>
+##contig=<ID=13,length=115169878,assembly=b37>
+##contig=<ID=14,length=107349540,assembly=b37>
+##contig=<ID=15,length=102531392,assembly=b37>
+##contig=<ID=16,length=90354753,assembly=b37>
+##contig=<ID=17,length=81195210,assembly=b37>
+##contig=<ID=18,length=78077248,assembly=b37>
+##contig=<ID=19,length=59128983,assembly=b37>
+##contig=<ID=20,length=63025520,assembly=b37>
+##contig=<ID=21,length=48129895,assembly=b37>
+##contig=<ID=22,length=51304566,assembly=b37>
+##contig=<ID=X,length=155270560,assembly=b37>
+##contig=<ID=Y,length=59373566,assembly=b37>
+##contig=<ID=MT,length=16569,assembly=b37>
+##contig=<ID=GL000207.1,length=4262,assembly=b37>
+##contig=<ID=GL000226.1,length=15008,assembly=b37>
+##contig=<ID=GL000229.1,length=19913,assembly=b37>
+##contig=<ID=GL000231.1,length=27386,assembly=b37>
+##contig=<ID=GL000210.1,length=27682,assembly=b37>
+##contig=<ID=GL000239.1,length=33824,assembly=b37>
+##contig=<ID=GL000235.1,length=34474,assembly=b37>
+##contig=<ID=GL000201.1,length=36148,assembly=b37>
+##contig=<ID=GL000247.1,length=36422,assembly=b37>
+##contig=<ID=GL000245.1,length=36651,assembly=b37>
+##contig=<ID=GL000197.1,length=37175,assembly=b37>
+##contig=<ID=GL000203.1,length=37498,assembly=b37>
+##contig=<ID=GL000246.1,length=38154,assembly=b37>
+##contig=<ID=GL000249.1,length=38502,assembly=b37>
+##contig=<ID=GL000196.1,length=38914,assembly=b37>
+##contig=<ID=GL000248.1,length=39786,assembly=b37>
+##contig=<ID=GL000244.1,length=39929,assembly=b37>
+##contig=<ID=GL000238.1,length=39939,assembly=b37>
+##contig=<ID=GL000202.1,length=40103,assembly=b37>
+##contig=<ID=GL000234.1,length=40531,assembly=b37>
+##contig=<ID=GL000232.1,length=40652,assembly=b37>
+##contig=<ID=GL000206.1,length=41001,assembly=b37>
+##contig=<ID=GL000240.1,length=41933,assembly=b37>
+##contig=<ID=GL000236.1,length=41934,assembly=b37>
+##contig=<ID=GL000241.1,length=42152,assembly=b37>
+##contig=<ID=GL000243.1,length=43341,assembly=b37>
+##contig=<ID=GL000242.1,length=43523,assembly=b37>
+##contig=<ID=GL000230.1,length=43691,assembly=b37>
+##contig=<ID=GL000237.1,length=45867,assembly=b37>
+##contig=<ID=GL000233.1,length=45941,assembly=b37>
+##contig=<ID=GL000204.1,length=81310,assembly=b37>
+##contig=<ID=GL000198.1,length=90085,assembly=b37>
+##contig=<ID=GL000208.1,length=92689,assembly=b37>
+##contig=<ID=GL000191.1,length=106433,assembly=b37>
+##contig=<ID=GL000227.1,length=128374,assembly=b37>
+##contig=<ID=GL000228.1,length=129120,assembly=b37>
+##contig=<ID=GL000214.1,length=137718,assembly=b37>
+##contig=<ID=GL000221.1,length=155397,assembly=b37>
+##contig=<ID=GL000209.1,length=159169,assembly=b37>
+##contig=<ID=GL000218.1,length=161147,assembly=b37>
+##contig=<ID=GL000220.1,length=161802,assembly=b37>
+##contig=<ID=GL000213.1,length=164239,assembly=b37>
+##contig=<ID=GL000211.1,length=166566,assembly=b37>
+##contig=<ID=GL000199.1,length=169874,assembly=b37>
+##contig=<ID=GL000217.1,length=172149,assembly=b37>
+##contig=<ID=GL000216.1,length=172294,assembly=b37>
+##contig=<ID=GL000215.1,length=172545,assembly=b37>
+##contig=<ID=GL000205.1,length=174588,assembly=b37>
+##contig=<ID=GL000219.1,length=179198,assembly=b37>
+##contig=<ID=GL000224.1,length=179693,assembly=b37>
+##contig=<ID=GL000223.1,length=180455,assembly=b37>
+##contig=<ID=GL000195.1,length=182896,assembly=b37>
+##contig=<ID=GL000212.1,length=186858,assembly=b37>
+##contig=<ID=GL000222.1,length=186861,assembly=b37>
+##contig=<ID=GL000200.1,length=187035,assembly=b37>
+##contig=<ID=GL000193.1,length=189789,assembly=b37>
+##contig=<ID=GL000194.1,length=191469,assembly=b37>
+##contig=<ID=GL000225.1,length=211173,assembly=b37>
+##contig=<ID=GL000192.1,length=547496,assembly=b37>
+##FilterLiftedVariants="analysis_type=FilterLiftedVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null reference_sequence=/local/sequence/reference/BWA_ref/hg19/hg19.fasta rodBind=[/local/scratch/xyliu/0.915136538286792.sorted.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false enable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null quiet_output_mode=false debug_mode=false help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub"
+##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
+##INFO=<ID=refseq.name,Number=1,Type=String,Description="RefSeq name">
+##INFO=<ID=refseq.positionType,Number=1,Type=String,Description="RefSeq genome type position">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+20	421808	.	A	ACCA	.	PASS	VC=INDEL;AC=24;AF=0.08;AN=316;refseq.name=NM_144628;refseq.positionType=intron
+20	1292033	.	C	CTTGT	.	PASS	VC=INDEL;AC=28;AF=0.1;AN=276;refseq.name=NM_080489;refseq.positionType=intron
+20	1340527	.	T	TGTC	.	PASS	VC=INDEL;AC=56;AF=0.18;AN=316
+20	1600125	.	GAA	G	.	PASS	VC=INDEL;AC=3;AF=0.01;AN=312;refseq.name=NM_001083910;refseq.positionType=intron
+20	1728298	.	G	GT	.	PASS	VC=INDEL;AC=199;AF=0.63;AN=316
+20	2171402	.	T	TA	.	PASS	VC=INDEL;AC=223;AF=0.71;AN=316
+20	2171404	.	A	AA	.	PASS	VC=INDEL;AC=223;AF=0.71;AN=316
+20	2982245	.	CT	C	.	PASS	VC=INDEL;AC=118;AF=0.38;AN=314;refseq.name=NM_002836;refseq.positionType=intron
+20	3025866	.	TCAAA	T	.	PASS	VC=INDEL;AC=31;AF=0.1;AN=306;refseq.name=NM_001501;refseq.positionType=intron
+20	3373441	.	TCTTT	T	.	PASS	VC=INDEL;AC=58;AF=0.18;AN=316;refseq.name=NM_001009984;refseq.positionType=intron
+20	3635159	.	T	TT	.	PASS	VC=INDEL;AC=130;AF=0.47;AN=274
+20	4422119	.	GCTCCCAGGCTACAGAAAGATGATGGAG	G	.	PASS	VC=INDEL;AC=174;AF=0.55;AN=314
+20	5151108	.	GTTCT	G	.	PASS	VC=INDEL;AC=61;AF=0.2;AN=304;refseq.name=NM_003818;refseq.positionType=intron
+20	5280839	.	T	TATA	.	PASS	VC=INDEL;AC=202;AF=0.75;AN=268
+20	5291223	.	TCAG	T	.	PASS	VC=INDEL;AC=51;AF=0.16;AN=316;refseq.name=NM_144773;refseq.positionType=intron
+20	5509358	.	T	TG	.	PASS	VC=INDEL;AC=136;AF=0.43;AN=316
+20	5900669	.	G	GC	.	PASS	VC=INDEL;AC=69;AF=0.23;AN=300;refseq.name=NM_001819;refseq.positionType=intron
+20	5900670	.	C	CC	.	PASS	VC=INDEL;AC=68;AF=0.22;AN=304;refseq.name=NM_001819;refseq.positionType=intron
+20	6351757	.	C	CTT	.	PASS	VC=INDEL;AC=52;AF=0.17;AN=314
+20	6362163	.	GC	G	.	PASS	VC=INDEL;AC=49;AF=0.2;AN=250
+20	6481086	.	T	TTGTC	.	PASS	VC=INDEL;AC=307;AF=0.97;AN=316
+20	8080280	.	GTTTG	G	.	PASS	VC=INDEL;AC=118;AF=0.37;AN=316
+20	8781394	.	AA	A	.	PASS	VC=INDEL;AC=122;AF=0.39;AN=316;refseq.name=NM_015192;refseq.positionType=intron
+20	8833756	.	TT	T	.	PASS	VC=INDEL;AC=13;AF=0.04;AN=298;refseq.name=NM_015192;refseq.positionType=intron
+20	9035330	.	T	TT	.	PASS	VC=INDEL;AC=110;AF=0.35;AN=314
+20	9311904	.	TGTATCTGTCCA	T	.	PASS	VC=INDEL;AC=46;AF=0.15;AN=314;refseq.name=NM_000933;refseq.positionType=intron
+20	9389232	.	GGGTTTGAT	G	.	PASS	VC=INDEL;AC=77;AF=0.25;AN=306;refseq.name=NM_000933;refseq.positionType=intron
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/infile02.vcf	Sat Jun 04 12:45:04 2016 -0400
@@ -0,0 +1,98 @@
+##fileformat=VCFv4.0
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=1,length=249250621,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+##contig=<ID=4,length=191154276,assembly=b37>
+##contig=<ID=5,length=180915260,assembly=b37>
+##contig=<ID=6,length=171115067,assembly=b37>
+##contig=<ID=7,length=159138663,assembly=b37>
+##contig=<ID=8,length=146364022,assembly=b37>
+##contig=<ID=9,length=141213431,assembly=b37>
+##contig=<ID=10,length=135534747,assembly=b37>
+##contig=<ID=11,length=135006516,assembly=b37>
+##contig=<ID=12,length=133851895,assembly=b37>
+##contig=<ID=13,length=115169878,assembly=b37>
+##contig=<ID=14,length=107349540,assembly=b37>
+##contig=<ID=15,length=102531392,assembly=b37>
+##contig=<ID=16,length=90354753,assembly=b37>
+##contig=<ID=17,length=81195210,assembly=b37>
+##contig=<ID=18,length=78077248,assembly=b37>
+##contig=<ID=19,length=59128983,assembly=b37>
+##contig=<ID=20,length=63025520,assembly=b37>
+##contig=<ID=21,length=48129895,assembly=b37>
+##contig=<ID=22,length=51304566,assembly=b37>
+##contig=<ID=X,length=155270560,assembly=b37>
+##contig=<ID=Y,length=59373566,assembly=b37>
+##contig=<ID=MT,length=16569,assembly=b37>
+##contig=<ID=GL000207.1,length=4262,assembly=b37>
+##contig=<ID=GL000226.1,length=15008,assembly=b37>
+##contig=<ID=GL000229.1,length=19913,assembly=b37>
+##contig=<ID=GL000231.1,length=27386,assembly=b37>
+##contig=<ID=GL000210.1,length=27682,assembly=b37>
+##contig=<ID=GL000239.1,length=33824,assembly=b37>
+##contig=<ID=GL000235.1,length=34474,assembly=b37>
+##contig=<ID=GL000201.1,length=36148,assembly=b37>
+##contig=<ID=GL000247.1,length=36422,assembly=b37>
+##contig=<ID=GL000245.1,length=36651,assembly=b37>
+##contig=<ID=GL000197.1,length=37175,assembly=b37>
+##contig=<ID=GL000203.1,length=37498,assembly=b37>
+##contig=<ID=GL000246.1,length=38154,assembly=b37>
+##contig=<ID=GL000249.1,length=38502,assembly=b37>
+##contig=<ID=GL000196.1,length=38914,assembly=b37>
+##contig=<ID=GL000248.1,length=39786,assembly=b37>
+##contig=<ID=GL000244.1,length=39929,assembly=b37>
+##contig=<ID=GL000238.1,length=39939,assembly=b37>
+##contig=<ID=GL000202.1,length=40103,assembly=b37>
+##contig=<ID=GL000234.1,length=40531,assembly=b37>
+##contig=<ID=GL000232.1,length=40652,assembly=b37>
+##contig=<ID=GL000206.1,length=41001,assembly=b37>
+##contig=<ID=GL000240.1,length=41933,assembly=b37>
+##contig=<ID=GL000236.1,length=41934,assembly=b37>
+##contig=<ID=GL000241.1,length=42152,assembly=b37>
+##contig=<ID=GL000243.1,length=43341,assembly=b37>
+##contig=<ID=GL000242.1,length=43523,assembly=b37>
+##contig=<ID=GL000230.1,length=43691,assembly=b37>
+##contig=<ID=GL000237.1,length=45867,assembly=b37>
+##contig=<ID=GL000233.1,length=45941,assembly=b37>
+##contig=<ID=GL000204.1,length=81310,assembly=b37>
+##contig=<ID=GL000198.1,length=90085,assembly=b37>
+##contig=<ID=GL000208.1,length=92689,assembly=b37>
+##contig=<ID=GL000191.1,length=106433,assembly=b37>
+##contig=<ID=GL000227.1,length=128374,assembly=b37>
+##contig=<ID=GL000228.1,length=129120,assembly=b37>
+##contig=<ID=GL000214.1,length=137718,assembly=b37>
+##contig=<ID=GL000221.1,length=155397,assembly=b37>
+##contig=<ID=GL000209.1,length=159169,assembly=b37>
+##contig=<ID=GL000218.1,length=161147,assembly=b37>
+##contig=<ID=GL000220.1,length=161802,assembly=b37>
+##contig=<ID=GL000213.1,length=164239,assembly=b37>
+##contig=<ID=GL000211.1,length=166566,assembly=b37>
+##contig=<ID=GL000199.1,length=169874,assembly=b37>
+##contig=<ID=GL000217.1,length=172149,assembly=b37>
+##contig=<ID=GL000216.1,length=172294,assembly=b37>
+##contig=<ID=GL000215.1,length=172545,assembly=b37>
+##contig=<ID=GL000205.1,length=174588,assembly=b37>
+##contig=<ID=GL000219.1,length=179198,assembly=b37>
+##contig=<ID=GL000224.1,length=179693,assembly=b37>
+##contig=<ID=GL000223.1,length=180455,assembly=b37>
+##contig=<ID=GL000195.1,length=182896,assembly=b37>
+##contig=<ID=GL000212.1,length=186858,assembly=b37>
+##contig=<ID=GL000222.1,length=186861,assembly=b37>
+##contig=<ID=GL000200.1,length=187035,assembly=b37>
+##contig=<ID=GL000193.1,length=189789,assembly=b37>
+##contig=<ID=GL000194.1,length=191469,assembly=b37>
+##contig=<ID=GL000225.1,length=211173,assembly=b37>
+##contig=<ID=GL000192.1,length=547496,assembly=b37>
+##FilterLiftedVariants="analysis_type=FilterLiftedVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null reference_sequence=/local/sequence/reference/BWA_ref/hg19/hg19.fasta rodBind=[/local/scratch/xyliu/0.915136538286792.sorted.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false enable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null quiet_output_mode=false debug_mode=false help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub"
+##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
+##INFO=<ID=refseq.name,Number=1,Type=String,Description="RefSeq name">
+##INFO=<ID=refseq.positionType,Number=1,Type=String,Description="RefSeq genome type position">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##INFO=<ID=OLD_MULTIALLELIC,Number=1,Type=String,Description="Original chr:pos:ref:alt encoding">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+1	3759889	.	TA	TAA,TAAA,T	.	PASS	AF=0.342,0.173,0.037	GT:DP:PL	1/2:81:281,5,9,58,0,115,338,46,116,809	0/0:86:0,30,323,31,365,483,38,291,325,567
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/normalize_result01.vcf	Sat Jun 04 12:45:04 2016 -0400
@@ -0,0 +1,124 @@
+##fileformat=VCFv4.0
+##FILTER=<ID=PASS,Description="All filters passed">
+##contig=<ID=1,length=249250621,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+##contig=<ID=4,length=191154276,assembly=b37>
+##contig=<ID=5,length=180915260,assembly=b37>
+##contig=<ID=6,length=171115067,assembly=b37>
+##contig=<ID=7,length=159138663,assembly=b37>
+##contig=<ID=8,length=146364022,assembly=b37>
+##contig=<ID=9,length=141213431,assembly=b37>
+##contig=<ID=10,length=135534747,assembly=b37>
+##contig=<ID=11,length=135006516,assembly=b37>
+##contig=<ID=12,length=133851895,assembly=b37>
+##contig=<ID=13,length=115169878,assembly=b37>
+##contig=<ID=14,length=107349540,assembly=b37>
+##contig=<ID=15,length=102531392,assembly=b37>
+##contig=<ID=16,length=90354753,assembly=b37>
+##contig=<ID=17,length=81195210,assembly=b37>
+##contig=<ID=18,length=78077248,assembly=b37>
+##contig=<ID=19,length=59128983,assembly=b37>
+##contig=<ID=20,length=63025520,assembly=b37>
+##contig=<ID=21,length=48129895,assembly=b37>
+##contig=<ID=22,length=51304566,assembly=b37>
+##contig=<ID=X,length=155270560,assembly=b37>
+##contig=<ID=Y,length=59373566,assembly=b37>
+##contig=<ID=MT,length=16569,assembly=b37>
+##contig=<ID=GL000207.1,length=4262,assembly=b37>
+##contig=<ID=GL000226.1,length=15008,assembly=b37>
+##contig=<ID=GL000229.1,length=19913,assembly=b37>
+##contig=<ID=GL000231.1,length=27386,assembly=b37>
+##contig=<ID=GL000210.1,length=27682,assembly=b37>
+##contig=<ID=GL000239.1,length=33824,assembly=b37>
+##contig=<ID=GL000235.1,length=34474,assembly=b37>
+##contig=<ID=GL000201.1,length=36148,assembly=b37>
+##contig=<ID=GL000247.1,length=36422,assembly=b37>
+##contig=<ID=GL000245.1,length=36651,assembly=b37>
+##contig=<ID=GL000197.1,length=37175,assembly=b37>
+##contig=<ID=GL000203.1,length=37498,assembly=b37>
+##contig=<ID=GL000246.1,length=38154,assembly=b37>
+##contig=<ID=GL000249.1,length=38502,assembly=b37>
+##contig=<ID=GL000196.1,length=38914,assembly=b37>
+##contig=<ID=GL000248.1,length=39786,assembly=b37>
+##contig=<ID=GL000244.1,length=39929,assembly=b37>
+##contig=<ID=GL000238.1,length=39939,assembly=b37>
+##contig=<ID=GL000202.1,length=40103,assembly=b37>
+##contig=<ID=GL000234.1,length=40531,assembly=b37>
+##contig=<ID=GL000232.1,length=40652,assembly=b37>
+##contig=<ID=GL000206.1,length=41001,assembly=b37>
+##contig=<ID=GL000240.1,length=41933,assembly=b37>
+##contig=<ID=GL000236.1,length=41934,assembly=b37>
+##contig=<ID=GL000241.1,length=42152,assembly=b37>
+##contig=<ID=GL000243.1,length=43341,assembly=b37>
+##contig=<ID=GL000242.1,length=43523,assembly=b37>
+##contig=<ID=GL000230.1,length=43691,assembly=b37>
+##contig=<ID=GL000237.1,length=45867,assembly=b37>
+##contig=<ID=GL000233.1,length=45941,assembly=b37>
+##contig=<ID=GL000204.1,length=81310,assembly=b37>
+##contig=<ID=GL000198.1,length=90085,assembly=b37>
+##contig=<ID=GL000208.1,length=92689,assembly=b37>
+##contig=<ID=GL000191.1,length=106433,assembly=b37>
+##contig=<ID=GL000227.1,length=128374,assembly=b37>
+##contig=<ID=GL000228.1,length=129120,assembly=b37>
+##contig=<ID=GL000214.1,length=137718,assembly=b37>
+##contig=<ID=GL000221.1,length=155397,assembly=b37>
+##contig=<ID=GL000209.1,length=159169,assembly=b37>
+##contig=<ID=GL000218.1,length=161147,assembly=b37>
+##contig=<ID=GL000220.1,length=161802,assembly=b37>
+##contig=<ID=GL000213.1,length=164239,assembly=b37>
+##contig=<ID=GL000211.1,length=166566,assembly=b37>
+##contig=<ID=GL000199.1,length=169874,assembly=b37>
+##contig=<ID=GL000217.1,length=172149,assembly=b37>
+##contig=<ID=GL000216.1,length=172294,assembly=b37>
+##contig=<ID=GL000215.1,length=172545,assembly=b37>
+##contig=<ID=GL000205.1,length=174588,assembly=b37>
+##contig=<ID=GL000219.1,length=179198,assembly=b37>
+##contig=<ID=GL000224.1,length=179693,assembly=b37>
+##contig=<ID=GL000223.1,length=180455,assembly=b37>
+##contig=<ID=GL000195.1,length=182896,assembly=b37>
+##contig=<ID=GL000212.1,length=186858,assembly=b37>
+##contig=<ID=GL000222.1,length=186861,assembly=b37>
+##contig=<ID=GL000200.1,length=187035,assembly=b37>
+##contig=<ID=GL000193.1,length=189789,assembly=b37>
+##contig=<ID=GL000194.1,length=191469,assembly=b37>
+##contig=<ID=GL000225.1,length=211173,assembly=b37>
+##contig=<ID=GL000192.1,length=547496,assembly=b37>
+##FilterLiftedVariants="analysis_type=FilterLiftedVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null reference_sequence=/local/sequence/reference/BWA_ref/hg19/hg19.fasta rodBind=[/local/scratch/xyliu/0.915136538286792.sorted.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false enable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null quiet_output_mode=false debug_mode=false help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub"
+##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
+##INFO=<ID=refseq.name,Number=1,Type=String,Description="RefSeq name">
+##INFO=<ID=refseq.positionType,Number=1,Type=String,Description="RefSeq genome type position">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##INFO=<ID=OLD_VARIANT,Number=.,Type=String,Description="Original chr:pos:ref:alt encoding">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+20	421805	.	T	TCCA	.	PASS	VC=INDEL;AC=24;AF=0.08;AN=316;refseq.name=NM_144628;refseq.positionType=intron;OLD_VARIANT=20:421808:A/ACCA
+20	1292033	.	C	CTTGT	.	PASS	VC=INDEL;AC=28;AF=0.1;AN=276;refseq.name=NM_080489;refseq.positionType=intron
+20	1340527	.	T	TGTC	.	PASS	VC=INDEL;AC=56;AF=0.18;AN=316
+20	1600125	.	GAA	G	.	PASS	VC=INDEL;AC=3;AF=0.01;AN=312;refseq.name=NM_001083910;refseq.positionType=intron
+20	1728298	.	G	GT	.	PASS	VC=INDEL;AC=199;AF=0.63;AN=316
+20	2171402	.	T	TA	.	PASS	VC=INDEL;AC=223;AF=0.71;AN=316
+20	2171402	.	T	TA	.	PASS	VC=INDEL;AC=223;AF=0.71;AN=316;OLD_VARIANT=20:2171404:A/AA
+20	2982245	.	CT	C	.	PASS	VC=INDEL;AC=118;AF=0.38;AN=314;refseq.name=NM_002836;refseq.positionType=intron
+20	3025866	.	TCAAA	T	.	PASS	VC=INDEL;AC=31;AF=0.1;AN=306;refseq.name=NM_001501;refseq.positionType=intron
+20	3373437	.	GCTTT	G	.	PASS	VC=INDEL;AC=58;AF=0.18;AN=316;refseq.name=NM_001009984;refseq.positionType=intron;OLD_VARIANT=20:3373441:TCTTT/T
+20	3635158	.	A	AT	.	PASS	VC=INDEL;AC=130;AF=0.47;AN=274;OLD_VARIANT=20:3635159:T/TT
+20	4422115	.	GGGAGCTCCCAGGCTACAGAAAGATGAT	G	.	PASS	VC=INDEL;AC=174;AF=0.55;AN=314;OLD_VARIANT=20:4422119:GCTCCCAGGCTACAGAAAGATGATGGAG/G
+20	5151108	.	GTTCT	G	.	PASS	VC=INDEL;AC=61;AF=0.2;AN=304;refseq.name=NM_003818;refseq.positionType=intron
+20	5280839	.	T	TATA	.	PASS	VC=INDEL;AC=202;AF=0.75;AN=268
+20	5291223	.	TCAG	T	.	PASS	VC=INDEL;AC=51;AF=0.16;AN=316;refseq.name=NM_144773;refseq.positionType=intron
+20	5509358	.	T	TG	.	PASS	VC=INDEL;AC=136;AF=0.43;AN=316
+20	5900669	.	G	GC	.	PASS	VC=INDEL;AC=69;AF=0.23;AN=300;refseq.name=NM_001819;refseq.positionType=intron
+20	5900669	.	G	GC	.	PASS	VC=INDEL;AC=68;AF=0.22;AN=304;refseq.name=NM_001819;refseq.positionType=intron;OLD_VARIANT=20:5900670:C/CC
+20	6351757	.	C	CTT	.	PASS	VC=INDEL;AC=52;AF=0.17;AN=314
+20	6362163	.	GC	G	.	PASS	VC=INDEL;AC=49;AF=0.2;AN=250
+20	6481086	.	T	TTGTC	.	PASS	VC=INDEL;AC=307;AF=0.97;AN=316
+20	8080280	.	GTTTG	G	.	PASS	VC=INDEL;AC=118;AF=0.37;AN=316
+20	8781394	.	AA	A	.	PASS	VC=INDEL;AC=122;AF=0.39;AN=316;refseq.name=NM_015192;refseq.positionType=intron
+20	7391879	.	C	CT	.	PASS	VC=INDEL;AC=110;AF=0.35;AN=314;OLD_VARIANT=20:9035330:T/TT
+20	8833756	.	TT	T	.	PASS	VC=INDEL;AC=13;AF=0.04;AN=298;refseq.name=NM_015192;refseq.positionType=intron
+20	9311904	.	TGTATCTGTCCA	T	.	PASS	VC=INDEL;AC=46;AF=0.15;AN=314;refseq.name=NM_000933;refseq.positionType=intron
+20	9389232	.	GGGTTTGAT	G	.	PASS	VC=INDEL;AC=77;AF=0.25;AN=306;refseq.name=NM_000933;refseq.positionType=intron
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample	Sat Jun 04 12:45:04 2016 -0400
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon	hg18	Human (Homo sapiens): hg18 Canonical	/depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full	hg18	Human (Homo sapiens): hg18 Full	/depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Sat Jun 04 12:45:04 2016 -0400
@@ -0,0 +1,7 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Sat Jun 04 12:45:04 2016 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="vt" version="5c735ab14b5603d9f14da6ee0e63d86ba3779934">
+        <repository changeset_revision="93d469825e03" name="package_vt_5c735ab14b5603d9f14da6ee0e63d86ba3779934" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vt_decompose.xml	Sat Jun 04 12:45:04 2016 -0400
@@ -0,0 +1,133 @@
+<tool id="vt_decompose" name="VT @BINARY@" version="@VERSION@.0">
+    <description>decomposes multiallelic variants into biallelic ones</description>
+    <macros>
+        <import>vt_macros.xml</import>
+        <token name="@BINARY@">decompose</token>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+
+        ln -s "${ infile }" infile.vcf &&
+
+
+        vt @BINARY@
+            #if str($output_format) == 'bcf':
+                -o decompose.bcf
+            #else:
+                -o decompose.vcf
+            #end if
+            $s
+            infile.vcf
+
+        &&
+        ## For some reason, the file move will randomly produce empty files.
+        ## Wait two seconds to let the system close file handlers and clean up.
+        sleep 2
+        &&
+
+        #if str($output_format) == 'bcf':
+            mv decompose.bcf "${ outfile }";
+        #else:
+            mv decompose.vcf "${ outfile }";
+        #end if
+
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="vcf" label="VCF file to be normalised" />
+
+        <param argument="-s" type="boolean" truevalue="-s" falsevalue=""
+            selected="false" label="Smart decomposition"
+            help="Splits up INFO and GENOTYPE fields that have number counts of R and A appropriately."/>
+
+        <param name="output_format" type="select" label="Choose the output format" help="">
+            <option value="bcf">BCF</option>
+            <option value="vcf" selected="true">VCF</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="vcf" label="${tool.name} on ${on_string}">
+            <change_format>
+                <when input="output_format" value="bcf" format="bcf" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="infile01.vcf" />
+            <output name="outfile" file="decompose_result01.vcf" ftype="vcf" />
+        </test>
+        <test>
+            <param name="infile" value="infile02.vcf" />
+            <param name="s" value="True" />
+            <output name="outfile" file="decompose_result02.vcf" ftype="vcf" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+
+**What it does**
+
+Decompose multiallelic variants in a VCF file.
+If the VCF file has genotype fields GT,PL, GL or DP, they are modified to reflect the change in alleles.
+All other genotype fields are removed. The -s option will retain the fields and decompose fields of counts R and A accordingingly.
+
+Decomposition and combining variants is a complex operation where the correctness is dependent on:
+
+    * whether the observed variants are seen in the same sample
+    * if same sample, whether they are homozygous or heterozygous
+    * if both heterozygous, whether they are in the same haplotype or not (if known)
+
+and one should be aware of the issues in handling variants resulting from such operations.
+The original purpose of this tool is to allow for allelic comparisons between call sets. 
+
+Standard option:
+
+Before decomposition
+
+.. code::
+
+  #CHROM  POS     ID   REF     ALT         QUAL   FILTER  INFO                  FORMAT    S1                                     S2
+  1       3759889 .    TA      TAA,TAAA,T  .      PASS    AF=0.342,0.173,0.037	GT:DP:PL	  1/2:81:281,5,9,58,0,115,338,46,116,809	 0/0:86:0,30,323,31,365,483,38,291,325,567
+
+After decomposition
+
+.. code::
+
+  #CHROM  POS     ID   REF     ALT         QUAL   FILTER  INFO                                        FORMAT   S1               S2
+  1	  3759889 .    TA      TAA	   .	  PASS    OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T    GT:PL    1/.:281,5,9      0/0:0,30,323
+  1	  3759889 .    TA      TAAA        .      .       OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T    GT:PL    ./1:281,58,115   0/0:0,31,483
+  1	  3759889 .    TA      T           .      .       OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T    GT:PL    ./.:281,338,809  0/0:0,38,567
+
+
+One might want to post process the partial genotypes like 1/. to the best guess genotype based on the PL values.
+
+
+With **-s** option:
+
+Before decomposition
+
+.. code::
+
+  #CHROM  POS     ID   REF     ALT         QUAL   FILTER  INFO                  FORMAT    S1                                     S2
+  1       3759889 .    TA      TAA,TAAA,T  .      PASS    AF=0.342,0.173,0.037	GT:DP:PL	  1/2:81:281,5,9,58,0,115,338,46,116,809	 0/0:86:0,30,323,31,365,483,38,291,325,567
+
+After decomposition
+
+.. code::
+
+  #CHROM  POS     ID   REF     ALT         QUAL   FILTER  INFO                                                 FORMAT   S1               S2
+  1	  3759889 .    TA      TAA	   .	  PASS    AF=0.342;OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T    GT:PL    1/.:281,5,9      0/0:0,30,323
+  1	  3759889 .    TA      TAAA        .      .       AF=0.173;OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T    GT:PL    ./1:281,58,115   0/0:0,31,483
+  1	  3759889 .    TA      T           .      .       AF=0.037;OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T    GT:PL    ./.:281,338,809  0/0:0,38,567
+
+In general, you should recompute fields that involves alleles after decomposition.  Information is generally lost after vertically decomposing a variant, so care should be taken in interpreting the resultant values.
+
+@CITATION@
+]]>
+    </help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vt_macros.xml	Sat Jun 04 12:45:04 2016 -0400
@@ -0,0 +1,42 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="2015.11.10">vt</requirement>
+            <yield />
+        </requirements>
+    </xml>
+
+    <xml name="version_command">
+        <version_command><![CDATA[
+            vt @BINARY@ --help 2>&1 >/dev/null | head -n 1
+        ]]>
+        </version_command>
+    </xml>
+
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+            <exit_code range=":-1" />
+            <regex match="Error:" />
+            <regex match="Exception:" />
+        </stdio>
+    </xml>
+
+    <token name="@VERSION@">0.2</token>
+
+    <token name="@CITATION@">------
+
+**Citation**
+
+If you use VT in your research, please cite the following project site:
+
+https://github.com/atks/vt
+
+    </token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/bioinformatics/btv112</citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>