Mercurial > repos > bgruening > vt
changeset 0:26babe3a66f1 draft default tip
planemo upload for repository https://github.com/atks/vt commit d4f5de5f229f503deb66a708f864cf380c900ce0
author | bgruening |
---|---|
date | Sat, 04 Jun 2016 10:41:29 -0400 |
parents | |
children | |
files | readme.rst test-data/20.fa.bz2 test-data/decompose_result01.vcf test-data/decompose_result02.vcf test-data/infile01.vcf test-data/infile02.vcf test-data/normalize_result01.vcf tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml vt_decompose.xml vt_macros.xml vt_normalize.xml |
diffstat | 13 files changed, 947 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Sat Jun 04 10:41:29 2016 -0400 @@ -0,0 +1,52 @@ +========================================== +Galaxy wrapper for VT the Variant Tool Set +========================================== + +A tool set for short variant discovery in genetic sequence data. + +http://genome.sph.umich.edu/wiki/vt + +============ +Installation +============ + +It is recommended to install this wrapper via the `Galaxy Tool Shed`. + +.. _`Galaxy Tool Shed`: https://testtoolshed.g2.bx.psu.edu/view/iuc/vt + + +======= +History +======= +- 0.1: Initial public release + + +==================== +Detailed description +==================== + +View the original GEMINI documentation: http://genome.sph.umich.edu/wiki/vt + + +=============================== +Wrapper Licence (MIT/BSD style) +=============================== + +Permission to use, copy, modify, and distribute this software and its +documentation with or without modifications and for any purpose and +without fee is hereby granted, provided that any copyright notices +appear in all copies and that both those copyright notices and this +permission notice appear in supporting documentation, and that the +names of the contributors or copyright holders not be used in +advertising or publicity pertaining to distribution of the software +without specific prior permission. + +THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT +OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +OR PERFORMANCE OF THIS SOFTWARE. +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/decompose_result01.vcf Sat Jun 04 10:41:29 2016 -0400 @@ -0,0 +1,124 @@ +##fileformat=VCFv4.0 +##FILTER=<ID=PASS,Description="All filters passed"> +##contig=<ID=1,length=249250621,assembly=b37> +##contig=<ID=2,length=243199373,assembly=b37> +##contig=<ID=3,length=198022430,assembly=b37> +##contig=<ID=4,length=191154276,assembly=b37> +##contig=<ID=5,length=180915260,assembly=b37> +##contig=<ID=6,length=171115067,assembly=b37> +##contig=<ID=7,length=159138663,assembly=b37> +##contig=<ID=8,length=146364022,assembly=b37> +##contig=<ID=9,length=141213431,assembly=b37> +##contig=<ID=10,length=135534747,assembly=b37> +##contig=<ID=11,length=135006516,assembly=b37> +##contig=<ID=12,length=133851895,assembly=b37> +##contig=<ID=13,length=115169878,assembly=b37> +##contig=<ID=14,length=107349540,assembly=b37> +##contig=<ID=15,length=102531392,assembly=b37> +##contig=<ID=16,length=90354753,assembly=b37> +##contig=<ID=17,length=81195210,assembly=b37> +##contig=<ID=18,length=78077248,assembly=b37> +##contig=<ID=19,length=59128983,assembly=b37> +##contig=<ID=20,length=63025520,assembly=b37> +##contig=<ID=21,length=48129895,assembly=b37> +##contig=<ID=22,length=51304566,assembly=b37> +##contig=<ID=X,length=155270560,assembly=b37> +##contig=<ID=Y,length=59373566,assembly=b37> +##contig=<ID=MT,length=16569,assembly=b37> +##contig=<ID=GL000207.1,length=4262,assembly=b37> +##contig=<ID=GL000226.1,length=15008,assembly=b37> +##contig=<ID=GL000229.1,length=19913,assembly=b37> +##contig=<ID=GL000231.1,length=27386,assembly=b37> +##contig=<ID=GL000210.1,length=27682,assembly=b37> +##contig=<ID=GL000239.1,length=33824,assembly=b37> +##contig=<ID=GL000235.1,length=34474,assembly=b37> +##contig=<ID=GL000201.1,length=36148,assembly=b37> +##contig=<ID=GL000247.1,length=36422,assembly=b37> +##contig=<ID=GL000245.1,length=36651,assembly=b37> +##contig=<ID=GL000197.1,length=37175,assembly=b37> +##contig=<ID=GL000203.1,length=37498,assembly=b37> +##contig=<ID=GL000246.1,length=38154,assembly=b37> +##contig=<ID=GL000249.1,length=38502,assembly=b37> +##contig=<ID=GL000196.1,length=38914,assembly=b37> +##contig=<ID=GL000248.1,length=39786,assembly=b37> +##contig=<ID=GL000244.1,length=39929,assembly=b37> +##contig=<ID=GL000238.1,length=39939,assembly=b37> +##contig=<ID=GL000202.1,length=40103,assembly=b37> +##contig=<ID=GL000234.1,length=40531,assembly=b37> +##contig=<ID=GL000232.1,length=40652,assembly=b37> +##contig=<ID=GL000206.1,length=41001,assembly=b37> +##contig=<ID=GL000240.1,length=41933,assembly=b37> +##contig=<ID=GL000236.1,length=41934,assembly=b37> +##contig=<ID=GL000241.1,length=42152,assembly=b37> +##contig=<ID=GL000243.1,length=43341,assembly=b37> +##contig=<ID=GL000242.1,length=43523,assembly=b37> +##contig=<ID=GL000230.1,length=43691,assembly=b37> +##contig=<ID=GL000237.1,length=45867,assembly=b37> +##contig=<ID=GL000233.1,length=45941,assembly=b37> +##contig=<ID=GL000204.1,length=81310,assembly=b37> +##contig=<ID=GL000198.1,length=90085,assembly=b37> +##contig=<ID=GL000208.1,length=92689,assembly=b37> +##contig=<ID=GL000191.1,length=106433,assembly=b37> +##contig=<ID=GL000227.1,length=128374,assembly=b37> +##contig=<ID=GL000228.1,length=129120,assembly=b37> +##contig=<ID=GL000214.1,length=137718,assembly=b37> +##contig=<ID=GL000221.1,length=155397,assembly=b37> +##contig=<ID=GL000209.1,length=159169,assembly=b37> +##contig=<ID=GL000218.1,length=161147,assembly=b37> +##contig=<ID=GL000220.1,length=161802,assembly=b37> +##contig=<ID=GL000213.1,length=164239,assembly=b37> +##contig=<ID=GL000211.1,length=166566,assembly=b37> +##contig=<ID=GL000199.1,length=169874,assembly=b37> +##contig=<ID=GL000217.1,length=172149,assembly=b37> +##contig=<ID=GL000216.1,length=172294,assembly=b37> +##contig=<ID=GL000215.1,length=172545,assembly=b37> +##contig=<ID=GL000205.1,length=174588,assembly=b37> +##contig=<ID=GL000219.1,length=179198,assembly=b37> +##contig=<ID=GL000224.1,length=179693,assembly=b37> +##contig=<ID=GL000223.1,length=180455,assembly=b37> +##contig=<ID=GL000195.1,length=182896,assembly=b37> +##contig=<ID=GL000212.1,length=186858,assembly=b37> +##contig=<ID=GL000222.1,length=186861,assembly=b37> +##contig=<ID=GL000200.1,length=187035,assembly=b37> +##contig=<ID=GL000193.1,length=189789,assembly=b37> +##contig=<ID=GL000194.1,length=191469,assembly=b37> +##contig=<ID=GL000225.1,length=211173,assembly=b37> +##contig=<ID=GL000192.1,length=547496,assembly=b37> +##FilterLiftedVariants="analysis_type=FilterLiftedVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null reference_sequence=/local/sequence/reference/BWA_ref/hg19/hg19.fasta rodBind=[/local/scratch/xyliu/0.915136538286792.sorted.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false enable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null quiet_output_mode=false debug_mode=false help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub" +##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class"> +##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership"> +##INFO=<ID=refseq.name,Number=1,Type=String,Description="RefSeq name"> +##INFO=<ID=refseq.positionType,Number=1,Type=String,Description="RefSeq genome type position"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##INFO=<ID=OLD_MULTIALLELIC,Number=1,Type=String,Description="Original chr:pos:ref:alt encoding"> +#CHROM POS ID REF ALT QUAL FILTER INFO +20 421808 . A ACCA . PASS VC=INDEL;AC=24;AF=0.08;AN=316;refseq.name=NM_144628;refseq.positionType=intron +20 1292033 . C CTTGT . PASS VC=INDEL;AC=28;AF=0.1;AN=276;refseq.name=NM_080489;refseq.positionType=intron +20 1340527 . T TGTC . PASS VC=INDEL;AC=56;AF=0.18;AN=316 +20 1600125 . GAA G . PASS VC=INDEL;AC=3;AF=0.01;AN=312;refseq.name=NM_001083910;refseq.positionType=intron +20 1728298 . G GT . PASS VC=INDEL;AC=199;AF=0.63;AN=316 +20 2171402 . T TA . PASS VC=INDEL;AC=223;AF=0.71;AN=316 +20 2171404 . A AA . PASS VC=INDEL;AC=223;AF=0.71;AN=316 +20 2982245 . CT C . PASS VC=INDEL;AC=118;AF=0.38;AN=314;refseq.name=NM_002836;refseq.positionType=intron +20 3025866 . TCAAA T . PASS VC=INDEL;AC=31;AF=0.1;AN=306;refseq.name=NM_001501;refseq.positionType=intron +20 3373441 . TCTTT T . PASS VC=INDEL;AC=58;AF=0.18;AN=316;refseq.name=NM_001009984;refseq.positionType=intron +20 3635159 . T TT . PASS VC=INDEL;AC=130;AF=0.47;AN=274 +20 4422119 . GCTCCCAGGCTACAGAAAGATGATGGAG G . PASS VC=INDEL;AC=174;AF=0.55;AN=314 +20 5151108 . GTTCT G . PASS VC=INDEL;AC=61;AF=0.2;AN=304;refseq.name=NM_003818;refseq.positionType=intron +20 5280839 . T TATA . PASS VC=INDEL;AC=202;AF=0.75;AN=268 +20 5291223 . TCAG T . PASS VC=INDEL;AC=51;AF=0.16;AN=316;refseq.name=NM_144773;refseq.positionType=intron +20 5509358 . T TG . PASS VC=INDEL;AC=136;AF=0.43;AN=316 +20 5900669 . G GC . PASS VC=INDEL;AC=69;AF=0.23;AN=300;refseq.name=NM_001819;refseq.positionType=intron +20 5900670 . C CC . PASS VC=INDEL;AC=68;AF=0.22;AN=304;refseq.name=NM_001819;refseq.positionType=intron +20 6351757 . C CTT . PASS VC=INDEL;AC=52;AF=0.17;AN=314 +20 6362163 . GC G . PASS VC=INDEL;AC=49;AF=0.2;AN=250 +20 6481086 . T TTGTC . PASS VC=INDEL;AC=307;AF=0.97;AN=316 +20 8080280 . GTTTG G . PASS VC=INDEL;AC=118;AF=0.37;AN=316 +20 8781394 . AA A . PASS VC=INDEL;AC=122;AF=0.39;AN=316;refseq.name=NM_015192;refseq.positionType=intron +20 8833756 . TT T . PASS VC=INDEL;AC=13;AF=0.04;AN=298;refseq.name=NM_015192;refseq.positionType=intron +20 9035330 . T TT . PASS VC=INDEL;AC=110;AF=0.35;AN=314 +20 9311904 . TGTATCTGTCCA T . PASS VC=INDEL;AC=46;AF=0.15;AN=314;refseq.name=NM_000933;refseq.positionType=intron +20 9389232 . GGGTTTGAT G . PASS VC=INDEL;AC=77;AF=0.25;AN=306;refseq.name=NM_000933;refseq.positionType=intron
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/decompose_result02.vcf Sat Jun 04 10:41:29 2016 -0400 @@ -0,0 +1,100 @@ +##fileformat=VCFv4.0 +##FILTER=<ID=PASS,Description="All filters passed"> +##contig=<ID=1,length=249250621,assembly=b37> +##contig=<ID=2,length=243199373,assembly=b37> +##contig=<ID=3,length=198022430,assembly=b37> +##contig=<ID=4,length=191154276,assembly=b37> +##contig=<ID=5,length=180915260,assembly=b37> +##contig=<ID=6,length=171115067,assembly=b37> +##contig=<ID=7,length=159138663,assembly=b37> +##contig=<ID=8,length=146364022,assembly=b37> +##contig=<ID=9,length=141213431,assembly=b37> +##contig=<ID=10,length=135534747,assembly=b37> +##contig=<ID=11,length=135006516,assembly=b37> +##contig=<ID=12,length=133851895,assembly=b37> +##contig=<ID=13,length=115169878,assembly=b37> +##contig=<ID=14,length=107349540,assembly=b37> +##contig=<ID=15,length=102531392,assembly=b37> +##contig=<ID=16,length=90354753,assembly=b37> +##contig=<ID=17,length=81195210,assembly=b37> +##contig=<ID=18,length=78077248,assembly=b37> +##contig=<ID=19,length=59128983,assembly=b37> +##contig=<ID=20,length=63025520,assembly=b37> +##contig=<ID=21,length=48129895,assembly=b37> +##contig=<ID=22,length=51304566,assembly=b37> +##contig=<ID=X,length=155270560,assembly=b37> +##contig=<ID=Y,length=59373566,assembly=b37> +##contig=<ID=MT,length=16569,assembly=b37> +##contig=<ID=GL000207.1,length=4262,assembly=b37> +##contig=<ID=GL000226.1,length=15008,assembly=b37> +##contig=<ID=GL000229.1,length=19913,assembly=b37> +##contig=<ID=GL000231.1,length=27386,assembly=b37> +##contig=<ID=GL000210.1,length=27682,assembly=b37> +##contig=<ID=GL000239.1,length=33824,assembly=b37> +##contig=<ID=GL000235.1,length=34474,assembly=b37> +##contig=<ID=GL000201.1,length=36148,assembly=b37> +##contig=<ID=GL000247.1,length=36422,assembly=b37> +##contig=<ID=GL000245.1,length=36651,assembly=b37> +##contig=<ID=GL000197.1,length=37175,assembly=b37> +##contig=<ID=GL000203.1,length=37498,assembly=b37> +##contig=<ID=GL000246.1,length=38154,assembly=b37> +##contig=<ID=GL000249.1,length=38502,assembly=b37> +##contig=<ID=GL000196.1,length=38914,assembly=b37> +##contig=<ID=GL000248.1,length=39786,assembly=b37> +##contig=<ID=GL000244.1,length=39929,assembly=b37> +##contig=<ID=GL000238.1,length=39939,assembly=b37> +##contig=<ID=GL000202.1,length=40103,assembly=b37> +##contig=<ID=GL000234.1,length=40531,assembly=b37> +##contig=<ID=GL000232.1,length=40652,assembly=b37> +##contig=<ID=GL000206.1,length=41001,assembly=b37> +##contig=<ID=GL000240.1,length=41933,assembly=b37> +##contig=<ID=GL000236.1,length=41934,assembly=b37> +##contig=<ID=GL000241.1,length=42152,assembly=b37> +##contig=<ID=GL000243.1,length=43341,assembly=b37> +##contig=<ID=GL000242.1,length=43523,assembly=b37> +##contig=<ID=GL000230.1,length=43691,assembly=b37> +##contig=<ID=GL000237.1,length=45867,assembly=b37> +##contig=<ID=GL000233.1,length=45941,assembly=b37> +##contig=<ID=GL000204.1,length=81310,assembly=b37> +##contig=<ID=GL000198.1,length=90085,assembly=b37> +##contig=<ID=GL000208.1,length=92689,assembly=b37> +##contig=<ID=GL000191.1,length=106433,assembly=b37> +##contig=<ID=GL000227.1,length=128374,assembly=b37> +##contig=<ID=GL000228.1,length=129120,assembly=b37> +##contig=<ID=GL000214.1,length=137718,assembly=b37> +##contig=<ID=GL000221.1,length=155397,assembly=b37> +##contig=<ID=GL000209.1,length=159169,assembly=b37> +##contig=<ID=GL000218.1,length=161147,assembly=b37> +##contig=<ID=GL000220.1,length=161802,assembly=b37> +##contig=<ID=GL000213.1,length=164239,assembly=b37> +##contig=<ID=GL000211.1,length=166566,assembly=b37> +##contig=<ID=GL000199.1,length=169874,assembly=b37> +##contig=<ID=GL000217.1,length=172149,assembly=b37> +##contig=<ID=GL000216.1,length=172294,assembly=b37> +##contig=<ID=GL000215.1,length=172545,assembly=b37> +##contig=<ID=GL000205.1,length=174588,assembly=b37> +##contig=<ID=GL000219.1,length=179198,assembly=b37> +##contig=<ID=GL000224.1,length=179693,assembly=b37> +##contig=<ID=GL000223.1,length=180455,assembly=b37> +##contig=<ID=GL000195.1,length=182896,assembly=b37> +##contig=<ID=GL000212.1,length=186858,assembly=b37> +##contig=<ID=GL000222.1,length=186861,assembly=b37> +##contig=<ID=GL000200.1,length=187035,assembly=b37> +##contig=<ID=GL000193.1,length=189789,assembly=b37> +##contig=<ID=GL000194.1,length=191469,assembly=b37> +##contig=<ID=GL000225.1,length=211173,assembly=b37> +##contig=<ID=GL000192.1,length=547496,assembly=b37> +##FilterLiftedVariants="analysis_type=FilterLiftedVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null reference_sequence=/local/sequence/reference/BWA_ref/hg19/hg19.fasta rodBind=[/local/scratch/xyliu/0.915136538286792.sorted.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false enable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null quiet_output_mode=false debug_mode=false help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub" +##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class"> +##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership"> +##INFO=<ID=refseq.name,Number=1,Type=String,Description="RefSeq name"> +##INFO=<ID=refseq.positionType,Number=1,Type=String,Description="RefSeq genome type position"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##INFO=<ID=OLD_MULTIALLELIC,Number=1,Type=String,Description="Original chr:pos:ref:alt encoding"> +#CHROM POS ID REF ALT QUAL FILTER INFO +1 3759889 . TA TAA . PASS AF=0.342,0.173,0.037;OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T +1 3759889 . TA TAAA . PASS AF=0.342,0.173,0.037;OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T +1 3759889 . TA T . PASS AF=0.342,0.173,0.037;OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/infile01.vcf Sat Jun 04 10:41:29 2016 -0400 @@ -0,0 +1,123 @@ +##fileformat=VCFv4.0 +##FILTER=<ID=PASS,Description="All filters passed"> +##contig=<ID=1,length=249250621,assembly=b37> +##contig=<ID=2,length=243199373,assembly=b37> +##contig=<ID=3,length=198022430,assembly=b37> +##contig=<ID=4,length=191154276,assembly=b37> +##contig=<ID=5,length=180915260,assembly=b37> +##contig=<ID=6,length=171115067,assembly=b37> +##contig=<ID=7,length=159138663,assembly=b37> +##contig=<ID=8,length=146364022,assembly=b37> +##contig=<ID=9,length=141213431,assembly=b37> +##contig=<ID=10,length=135534747,assembly=b37> +##contig=<ID=11,length=135006516,assembly=b37> +##contig=<ID=12,length=133851895,assembly=b37> +##contig=<ID=13,length=115169878,assembly=b37> +##contig=<ID=14,length=107349540,assembly=b37> +##contig=<ID=15,length=102531392,assembly=b37> +##contig=<ID=16,length=90354753,assembly=b37> +##contig=<ID=17,length=81195210,assembly=b37> +##contig=<ID=18,length=78077248,assembly=b37> +##contig=<ID=19,length=59128983,assembly=b37> +##contig=<ID=20,length=63025520,assembly=b37> +##contig=<ID=21,length=48129895,assembly=b37> +##contig=<ID=22,length=51304566,assembly=b37> +##contig=<ID=X,length=155270560,assembly=b37> +##contig=<ID=Y,length=59373566,assembly=b37> +##contig=<ID=MT,length=16569,assembly=b37> +##contig=<ID=GL000207.1,length=4262,assembly=b37> +##contig=<ID=GL000226.1,length=15008,assembly=b37> +##contig=<ID=GL000229.1,length=19913,assembly=b37> +##contig=<ID=GL000231.1,length=27386,assembly=b37> +##contig=<ID=GL000210.1,length=27682,assembly=b37> +##contig=<ID=GL000239.1,length=33824,assembly=b37> +##contig=<ID=GL000235.1,length=34474,assembly=b37> +##contig=<ID=GL000201.1,length=36148,assembly=b37> +##contig=<ID=GL000247.1,length=36422,assembly=b37> +##contig=<ID=GL000245.1,length=36651,assembly=b37> +##contig=<ID=GL000197.1,length=37175,assembly=b37> +##contig=<ID=GL000203.1,length=37498,assembly=b37> +##contig=<ID=GL000246.1,length=38154,assembly=b37> +##contig=<ID=GL000249.1,length=38502,assembly=b37> +##contig=<ID=GL000196.1,length=38914,assembly=b37> +##contig=<ID=GL000248.1,length=39786,assembly=b37> +##contig=<ID=GL000244.1,length=39929,assembly=b37> +##contig=<ID=GL000238.1,length=39939,assembly=b37> +##contig=<ID=GL000202.1,length=40103,assembly=b37> +##contig=<ID=GL000234.1,length=40531,assembly=b37> +##contig=<ID=GL000232.1,length=40652,assembly=b37> +##contig=<ID=GL000206.1,length=41001,assembly=b37> +##contig=<ID=GL000240.1,length=41933,assembly=b37> +##contig=<ID=GL000236.1,length=41934,assembly=b37> +##contig=<ID=GL000241.1,length=42152,assembly=b37> +##contig=<ID=GL000243.1,length=43341,assembly=b37> +##contig=<ID=GL000242.1,length=43523,assembly=b37> +##contig=<ID=GL000230.1,length=43691,assembly=b37> +##contig=<ID=GL000237.1,length=45867,assembly=b37> +##contig=<ID=GL000233.1,length=45941,assembly=b37> +##contig=<ID=GL000204.1,length=81310,assembly=b37> +##contig=<ID=GL000198.1,length=90085,assembly=b37> +##contig=<ID=GL000208.1,length=92689,assembly=b37> +##contig=<ID=GL000191.1,length=106433,assembly=b37> +##contig=<ID=GL000227.1,length=128374,assembly=b37> +##contig=<ID=GL000228.1,length=129120,assembly=b37> +##contig=<ID=GL000214.1,length=137718,assembly=b37> +##contig=<ID=GL000221.1,length=155397,assembly=b37> +##contig=<ID=GL000209.1,length=159169,assembly=b37> +##contig=<ID=GL000218.1,length=161147,assembly=b37> +##contig=<ID=GL000220.1,length=161802,assembly=b37> +##contig=<ID=GL000213.1,length=164239,assembly=b37> +##contig=<ID=GL000211.1,length=166566,assembly=b37> +##contig=<ID=GL000199.1,length=169874,assembly=b37> +##contig=<ID=GL000217.1,length=172149,assembly=b37> +##contig=<ID=GL000216.1,length=172294,assembly=b37> +##contig=<ID=GL000215.1,length=172545,assembly=b37> +##contig=<ID=GL000205.1,length=174588,assembly=b37> +##contig=<ID=GL000219.1,length=179198,assembly=b37> +##contig=<ID=GL000224.1,length=179693,assembly=b37> +##contig=<ID=GL000223.1,length=180455,assembly=b37> +##contig=<ID=GL000195.1,length=182896,assembly=b37> +##contig=<ID=GL000212.1,length=186858,assembly=b37> +##contig=<ID=GL000222.1,length=186861,assembly=b37> +##contig=<ID=GL000200.1,length=187035,assembly=b37> +##contig=<ID=GL000193.1,length=189789,assembly=b37> +##contig=<ID=GL000194.1,length=191469,assembly=b37> +##contig=<ID=GL000225.1,length=211173,assembly=b37> +##contig=<ID=GL000192.1,length=547496,assembly=b37> +##FilterLiftedVariants="analysis_type=FilterLiftedVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null reference_sequence=/local/sequence/reference/BWA_ref/hg19/hg19.fasta rodBind=[/local/scratch/xyliu/0.915136538286792.sorted.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false enable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null quiet_output_mode=false debug_mode=false help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub" +##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class"> +##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership"> +##INFO=<ID=refseq.name,Number=1,Type=String,Description="RefSeq name"> +##INFO=<ID=refseq.positionType,Number=1,Type=String,Description="RefSeq genome type position"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +#CHROM POS ID REF ALT QUAL FILTER INFO +20 421808 . A ACCA . PASS VC=INDEL;AC=24;AF=0.08;AN=316;refseq.name=NM_144628;refseq.positionType=intron +20 1292033 . C CTTGT . PASS VC=INDEL;AC=28;AF=0.1;AN=276;refseq.name=NM_080489;refseq.positionType=intron +20 1340527 . T TGTC . PASS VC=INDEL;AC=56;AF=0.18;AN=316 +20 1600125 . GAA G . PASS VC=INDEL;AC=3;AF=0.01;AN=312;refseq.name=NM_001083910;refseq.positionType=intron +20 1728298 . G GT . PASS VC=INDEL;AC=199;AF=0.63;AN=316 +20 2171402 . T TA . PASS VC=INDEL;AC=223;AF=0.71;AN=316 +20 2171404 . A AA . PASS VC=INDEL;AC=223;AF=0.71;AN=316 +20 2982245 . CT C . PASS VC=INDEL;AC=118;AF=0.38;AN=314;refseq.name=NM_002836;refseq.positionType=intron +20 3025866 . TCAAA T . PASS VC=INDEL;AC=31;AF=0.1;AN=306;refseq.name=NM_001501;refseq.positionType=intron +20 3373441 . TCTTT T . PASS VC=INDEL;AC=58;AF=0.18;AN=316;refseq.name=NM_001009984;refseq.positionType=intron +20 3635159 . T TT . PASS VC=INDEL;AC=130;AF=0.47;AN=274 +20 4422119 . GCTCCCAGGCTACAGAAAGATGATGGAG G . PASS VC=INDEL;AC=174;AF=0.55;AN=314 +20 5151108 . GTTCT G . PASS VC=INDEL;AC=61;AF=0.2;AN=304;refseq.name=NM_003818;refseq.positionType=intron +20 5280839 . T TATA . PASS VC=INDEL;AC=202;AF=0.75;AN=268 +20 5291223 . TCAG T . PASS VC=INDEL;AC=51;AF=0.16;AN=316;refseq.name=NM_144773;refseq.positionType=intron +20 5509358 . T TG . PASS VC=INDEL;AC=136;AF=0.43;AN=316 +20 5900669 . G GC . PASS VC=INDEL;AC=69;AF=0.23;AN=300;refseq.name=NM_001819;refseq.positionType=intron +20 5900670 . C CC . PASS VC=INDEL;AC=68;AF=0.22;AN=304;refseq.name=NM_001819;refseq.positionType=intron +20 6351757 . C CTT . PASS VC=INDEL;AC=52;AF=0.17;AN=314 +20 6362163 . GC G . PASS VC=INDEL;AC=49;AF=0.2;AN=250 +20 6481086 . T TTGTC . PASS VC=INDEL;AC=307;AF=0.97;AN=316 +20 8080280 . GTTTG G . PASS VC=INDEL;AC=118;AF=0.37;AN=316 +20 8781394 . AA A . PASS VC=INDEL;AC=122;AF=0.39;AN=316;refseq.name=NM_015192;refseq.positionType=intron +20 8833756 . TT T . PASS VC=INDEL;AC=13;AF=0.04;AN=298;refseq.name=NM_015192;refseq.positionType=intron +20 9035330 . T TT . PASS VC=INDEL;AC=110;AF=0.35;AN=314 +20 9311904 . TGTATCTGTCCA T . PASS VC=INDEL;AC=46;AF=0.15;AN=314;refseq.name=NM_000933;refseq.positionType=intron +20 9389232 . GGGTTTGAT G . PASS VC=INDEL;AC=77;AF=0.25;AN=306;refseq.name=NM_000933;refseq.positionType=intron
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/infile02.vcf Sat Jun 04 10:41:29 2016 -0400 @@ -0,0 +1,98 @@ +##fileformat=VCFv4.0 +##FILTER=<ID=PASS,Description="All filters passed"> +##contig=<ID=1,length=249250621,assembly=b37> +##contig=<ID=2,length=243199373,assembly=b37> +##contig=<ID=3,length=198022430,assembly=b37> +##contig=<ID=4,length=191154276,assembly=b37> +##contig=<ID=5,length=180915260,assembly=b37> +##contig=<ID=6,length=171115067,assembly=b37> +##contig=<ID=7,length=159138663,assembly=b37> +##contig=<ID=8,length=146364022,assembly=b37> +##contig=<ID=9,length=141213431,assembly=b37> +##contig=<ID=10,length=135534747,assembly=b37> +##contig=<ID=11,length=135006516,assembly=b37> +##contig=<ID=12,length=133851895,assembly=b37> +##contig=<ID=13,length=115169878,assembly=b37> +##contig=<ID=14,length=107349540,assembly=b37> +##contig=<ID=15,length=102531392,assembly=b37> +##contig=<ID=16,length=90354753,assembly=b37> +##contig=<ID=17,length=81195210,assembly=b37> +##contig=<ID=18,length=78077248,assembly=b37> +##contig=<ID=19,length=59128983,assembly=b37> +##contig=<ID=20,length=63025520,assembly=b37> +##contig=<ID=21,length=48129895,assembly=b37> +##contig=<ID=22,length=51304566,assembly=b37> +##contig=<ID=X,length=155270560,assembly=b37> +##contig=<ID=Y,length=59373566,assembly=b37> +##contig=<ID=MT,length=16569,assembly=b37> +##contig=<ID=GL000207.1,length=4262,assembly=b37> +##contig=<ID=GL000226.1,length=15008,assembly=b37> +##contig=<ID=GL000229.1,length=19913,assembly=b37> +##contig=<ID=GL000231.1,length=27386,assembly=b37> +##contig=<ID=GL000210.1,length=27682,assembly=b37> +##contig=<ID=GL000239.1,length=33824,assembly=b37> +##contig=<ID=GL000235.1,length=34474,assembly=b37> +##contig=<ID=GL000201.1,length=36148,assembly=b37> +##contig=<ID=GL000247.1,length=36422,assembly=b37> +##contig=<ID=GL000245.1,length=36651,assembly=b37> +##contig=<ID=GL000197.1,length=37175,assembly=b37> +##contig=<ID=GL000203.1,length=37498,assembly=b37> +##contig=<ID=GL000246.1,length=38154,assembly=b37> +##contig=<ID=GL000249.1,length=38502,assembly=b37> +##contig=<ID=GL000196.1,length=38914,assembly=b37> +##contig=<ID=GL000248.1,length=39786,assembly=b37> +##contig=<ID=GL000244.1,length=39929,assembly=b37> +##contig=<ID=GL000238.1,length=39939,assembly=b37> +##contig=<ID=GL000202.1,length=40103,assembly=b37> +##contig=<ID=GL000234.1,length=40531,assembly=b37> +##contig=<ID=GL000232.1,length=40652,assembly=b37> +##contig=<ID=GL000206.1,length=41001,assembly=b37> +##contig=<ID=GL000240.1,length=41933,assembly=b37> +##contig=<ID=GL000236.1,length=41934,assembly=b37> +##contig=<ID=GL000241.1,length=42152,assembly=b37> +##contig=<ID=GL000243.1,length=43341,assembly=b37> +##contig=<ID=GL000242.1,length=43523,assembly=b37> +##contig=<ID=GL000230.1,length=43691,assembly=b37> +##contig=<ID=GL000237.1,length=45867,assembly=b37> +##contig=<ID=GL000233.1,length=45941,assembly=b37> +##contig=<ID=GL000204.1,length=81310,assembly=b37> +##contig=<ID=GL000198.1,length=90085,assembly=b37> +##contig=<ID=GL000208.1,length=92689,assembly=b37> +##contig=<ID=GL000191.1,length=106433,assembly=b37> +##contig=<ID=GL000227.1,length=128374,assembly=b37> +##contig=<ID=GL000228.1,length=129120,assembly=b37> +##contig=<ID=GL000214.1,length=137718,assembly=b37> +##contig=<ID=GL000221.1,length=155397,assembly=b37> +##contig=<ID=GL000209.1,length=159169,assembly=b37> +##contig=<ID=GL000218.1,length=161147,assembly=b37> +##contig=<ID=GL000220.1,length=161802,assembly=b37> +##contig=<ID=GL000213.1,length=164239,assembly=b37> +##contig=<ID=GL000211.1,length=166566,assembly=b37> +##contig=<ID=GL000199.1,length=169874,assembly=b37> +##contig=<ID=GL000217.1,length=172149,assembly=b37> +##contig=<ID=GL000216.1,length=172294,assembly=b37> +##contig=<ID=GL000215.1,length=172545,assembly=b37> +##contig=<ID=GL000205.1,length=174588,assembly=b37> +##contig=<ID=GL000219.1,length=179198,assembly=b37> +##contig=<ID=GL000224.1,length=179693,assembly=b37> +##contig=<ID=GL000223.1,length=180455,assembly=b37> +##contig=<ID=GL000195.1,length=182896,assembly=b37> +##contig=<ID=GL000212.1,length=186858,assembly=b37> +##contig=<ID=GL000222.1,length=186861,assembly=b37> +##contig=<ID=GL000200.1,length=187035,assembly=b37> +##contig=<ID=GL000193.1,length=189789,assembly=b37> +##contig=<ID=GL000194.1,length=191469,assembly=b37> +##contig=<ID=GL000225.1,length=211173,assembly=b37> +##contig=<ID=GL000192.1,length=547496,assembly=b37> +##FilterLiftedVariants="analysis_type=FilterLiftedVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null reference_sequence=/local/sequence/reference/BWA_ref/hg19/hg19.fasta rodBind=[/local/scratch/xyliu/0.915136538286792.sorted.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false enable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null quiet_output_mode=false debug_mode=false help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub" +##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class"> +##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership"> +##INFO=<ID=refseq.name,Number=1,Type=String,Description="RefSeq name"> +##INFO=<ID=refseq.positionType,Number=1,Type=String,Description="RefSeq genome type position"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##INFO=<ID=OLD_MULTIALLELIC,Number=1,Type=String,Description="Original chr:pos:ref:alt encoding"> +#CHROM POS ID REF ALT QUAL FILTER INFO +1 3759889 . TA TAA,TAAA,T . PASS AF=0.342,0.173,0.037 GT:DP:PL 1/2:81:281,5,9,58,0,115,338,46,116,809 0/0:86:0,30,323,31,365,483,38,291,325,567
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/normalize_result01.vcf Sat Jun 04 10:41:29 2016 -0400 @@ -0,0 +1,124 @@ +##fileformat=VCFv4.0 +##FILTER=<ID=PASS,Description="All filters passed"> +##contig=<ID=1,length=249250621,assembly=b37> +##contig=<ID=2,length=243199373,assembly=b37> +##contig=<ID=3,length=198022430,assembly=b37> +##contig=<ID=4,length=191154276,assembly=b37> +##contig=<ID=5,length=180915260,assembly=b37> +##contig=<ID=6,length=171115067,assembly=b37> +##contig=<ID=7,length=159138663,assembly=b37> +##contig=<ID=8,length=146364022,assembly=b37> +##contig=<ID=9,length=141213431,assembly=b37> +##contig=<ID=10,length=135534747,assembly=b37> +##contig=<ID=11,length=135006516,assembly=b37> +##contig=<ID=12,length=133851895,assembly=b37> +##contig=<ID=13,length=115169878,assembly=b37> +##contig=<ID=14,length=107349540,assembly=b37> +##contig=<ID=15,length=102531392,assembly=b37> +##contig=<ID=16,length=90354753,assembly=b37> +##contig=<ID=17,length=81195210,assembly=b37> +##contig=<ID=18,length=78077248,assembly=b37> +##contig=<ID=19,length=59128983,assembly=b37> +##contig=<ID=20,length=63025520,assembly=b37> +##contig=<ID=21,length=48129895,assembly=b37> +##contig=<ID=22,length=51304566,assembly=b37> +##contig=<ID=X,length=155270560,assembly=b37> +##contig=<ID=Y,length=59373566,assembly=b37> +##contig=<ID=MT,length=16569,assembly=b37> +##contig=<ID=GL000207.1,length=4262,assembly=b37> +##contig=<ID=GL000226.1,length=15008,assembly=b37> +##contig=<ID=GL000229.1,length=19913,assembly=b37> +##contig=<ID=GL000231.1,length=27386,assembly=b37> +##contig=<ID=GL000210.1,length=27682,assembly=b37> +##contig=<ID=GL000239.1,length=33824,assembly=b37> +##contig=<ID=GL000235.1,length=34474,assembly=b37> +##contig=<ID=GL000201.1,length=36148,assembly=b37> +##contig=<ID=GL000247.1,length=36422,assembly=b37> +##contig=<ID=GL000245.1,length=36651,assembly=b37> +##contig=<ID=GL000197.1,length=37175,assembly=b37> +##contig=<ID=GL000203.1,length=37498,assembly=b37> +##contig=<ID=GL000246.1,length=38154,assembly=b37> +##contig=<ID=GL000249.1,length=38502,assembly=b37> +##contig=<ID=GL000196.1,length=38914,assembly=b37> +##contig=<ID=GL000248.1,length=39786,assembly=b37> +##contig=<ID=GL000244.1,length=39929,assembly=b37> +##contig=<ID=GL000238.1,length=39939,assembly=b37> +##contig=<ID=GL000202.1,length=40103,assembly=b37> +##contig=<ID=GL000234.1,length=40531,assembly=b37> +##contig=<ID=GL000232.1,length=40652,assembly=b37> +##contig=<ID=GL000206.1,length=41001,assembly=b37> +##contig=<ID=GL000240.1,length=41933,assembly=b37> +##contig=<ID=GL000236.1,length=41934,assembly=b37> +##contig=<ID=GL000241.1,length=42152,assembly=b37> +##contig=<ID=GL000243.1,length=43341,assembly=b37> +##contig=<ID=GL000242.1,length=43523,assembly=b37> +##contig=<ID=GL000230.1,length=43691,assembly=b37> +##contig=<ID=GL000237.1,length=45867,assembly=b37> +##contig=<ID=GL000233.1,length=45941,assembly=b37> +##contig=<ID=GL000204.1,length=81310,assembly=b37> +##contig=<ID=GL000198.1,length=90085,assembly=b37> +##contig=<ID=GL000208.1,length=92689,assembly=b37> +##contig=<ID=GL000191.1,length=106433,assembly=b37> +##contig=<ID=GL000227.1,length=128374,assembly=b37> +##contig=<ID=GL000228.1,length=129120,assembly=b37> +##contig=<ID=GL000214.1,length=137718,assembly=b37> +##contig=<ID=GL000221.1,length=155397,assembly=b37> +##contig=<ID=GL000209.1,length=159169,assembly=b37> +##contig=<ID=GL000218.1,length=161147,assembly=b37> +##contig=<ID=GL000220.1,length=161802,assembly=b37> +##contig=<ID=GL000213.1,length=164239,assembly=b37> +##contig=<ID=GL000211.1,length=166566,assembly=b37> +##contig=<ID=GL000199.1,length=169874,assembly=b37> +##contig=<ID=GL000217.1,length=172149,assembly=b37> +##contig=<ID=GL000216.1,length=172294,assembly=b37> +##contig=<ID=GL000215.1,length=172545,assembly=b37> +##contig=<ID=GL000205.1,length=174588,assembly=b37> +##contig=<ID=GL000219.1,length=179198,assembly=b37> +##contig=<ID=GL000224.1,length=179693,assembly=b37> +##contig=<ID=GL000223.1,length=180455,assembly=b37> +##contig=<ID=GL000195.1,length=182896,assembly=b37> +##contig=<ID=GL000212.1,length=186858,assembly=b37> +##contig=<ID=GL000222.1,length=186861,assembly=b37> +##contig=<ID=GL000200.1,length=187035,assembly=b37> +##contig=<ID=GL000193.1,length=189789,assembly=b37> +##contig=<ID=GL000194.1,length=191469,assembly=b37> +##contig=<ID=GL000225.1,length=211173,assembly=b37> +##contig=<ID=GL000192.1,length=547496,assembly=b37> +##FilterLiftedVariants="analysis_type=FilterLiftedVariants input_file=[] sample_metadata=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null reference_sequence=/local/sequence/reference/BWA_ref/hg19/hg19.fasta rodBind=[/local/scratch/xyliu/0.915136538286792.sorted.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null downsampling_type=null downsample_to_fraction=null downsample_to_coverage=null baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 interval_merging=ALL read_group_black_list=null processingTracker=null restartProcessingTracker=false processingTrackerStatusFile=null processingTrackerID=-1 allow_intervals_with_unindexed_bam=false enable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null quiet_output_mode=false debug_mode=false help=false out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub" +##INFO=<ID=VC,Number=1,Type=String,Description="Variation Class"> +##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership"> +##INFO=<ID=refseq.name,Number=1,Type=String,Description="RefSeq name"> +##INFO=<ID=refseq.positionType,Number=1,Type=String,Description="RefSeq genome type position"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##INFO=<ID=OLD_VARIANT,Number=.,Type=String,Description="Original chr:pos:ref:alt encoding"> +#CHROM POS ID REF ALT QUAL FILTER INFO +20 421805 . T TCCA . PASS VC=INDEL;AC=24;AF=0.08;AN=316;refseq.name=NM_144628;refseq.positionType=intron;OLD_VARIANT=20:421808:A/ACCA +20 1292033 . C CTTGT . PASS VC=INDEL;AC=28;AF=0.1;AN=276;refseq.name=NM_080489;refseq.positionType=intron +20 1340527 . T TGTC . PASS VC=INDEL;AC=56;AF=0.18;AN=316 +20 1600125 . GAA G . PASS VC=INDEL;AC=3;AF=0.01;AN=312;refseq.name=NM_001083910;refseq.positionType=intron +20 1728298 . G GT . PASS VC=INDEL;AC=199;AF=0.63;AN=316 +20 2171402 . T TA . PASS VC=INDEL;AC=223;AF=0.71;AN=316 +20 2171402 . T TA . PASS VC=INDEL;AC=223;AF=0.71;AN=316;OLD_VARIANT=20:2171404:A/AA +20 2982245 . CT C . PASS VC=INDEL;AC=118;AF=0.38;AN=314;refseq.name=NM_002836;refseq.positionType=intron +20 3025866 . TCAAA T . PASS VC=INDEL;AC=31;AF=0.1;AN=306;refseq.name=NM_001501;refseq.positionType=intron +20 3373437 . GCTTT G . PASS VC=INDEL;AC=58;AF=0.18;AN=316;refseq.name=NM_001009984;refseq.positionType=intron;OLD_VARIANT=20:3373441:TCTTT/T +20 3635158 . A AT . PASS VC=INDEL;AC=130;AF=0.47;AN=274;OLD_VARIANT=20:3635159:T/TT +20 4422115 . GGGAGCTCCCAGGCTACAGAAAGATGAT G . PASS VC=INDEL;AC=174;AF=0.55;AN=314;OLD_VARIANT=20:4422119:GCTCCCAGGCTACAGAAAGATGATGGAG/G +20 5151108 . GTTCT G . PASS VC=INDEL;AC=61;AF=0.2;AN=304;refseq.name=NM_003818;refseq.positionType=intron +20 5280839 . T TATA . PASS VC=INDEL;AC=202;AF=0.75;AN=268 +20 5291223 . TCAG T . PASS VC=INDEL;AC=51;AF=0.16;AN=316;refseq.name=NM_144773;refseq.positionType=intron +20 5509358 . T TG . PASS VC=INDEL;AC=136;AF=0.43;AN=316 +20 5900669 . G GC . PASS VC=INDEL;AC=69;AF=0.23;AN=300;refseq.name=NM_001819;refseq.positionType=intron +20 5900669 . G GC . PASS VC=INDEL;AC=68;AF=0.22;AN=304;refseq.name=NM_001819;refseq.positionType=intron;OLD_VARIANT=20:5900670:C/CC +20 6351757 . C CTT . PASS VC=INDEL;AC=52;AF=0.17;AN=314 +20 6362163 . GC G . PASS VC=INDEL;AC=49;AF=0.2;AN=250 +20 6481086 . T TTGTC . PASS VC=INDEL;AC=307;AF=0.97;AN=316 +20 8080280 . GTTTG G . PASS VC=INDEL;AC=118;AF=0.37;AN=316 +20 8781394 . AA A . PASS VC=INDEL;AC=122;AF=0.39;AN=316;refseq.name=NM_015192;refseq.positionType=intron +20 7391879 . C CT . PASS VC=INDEL;AC=110;AF=0.35;AN=314;OLD_VARIANT=20:9035330:T/TT +20 8833756 . TT T . PASS VC=INDEL;AC=13;AF=0.04;AN=298;refseq.name=NM_015192;refseq.positionType=intron +20 9311904 . TGTATCTGTCCA T . PASS VC=INDEL;AC=46;AF=0.15;AN=314;refseq.name=NM_000933;refseq.positionType=intron +20 9389232 . GGGTTTGAT G . PASS VC=INDEL;AC=77;AF=0.25;AN=306;refseq.name=NM_000933;refseq.positionType=intron
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Sat Jun 04 10:41:29 2016 -0400 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Sat Jun 04 10:41:29 2016 -0400 @@ -0,0 +1,7 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Sat Jun 04 10:41:29 2016 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="vt" version="5c735ab14b5603d9f14da6ee0e63d86ba3779934"> + <repository changeset_revision="93d469825e03" name="package_vt_5c735ab14b5603d9f14da6ee0e63d86ba3779934" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vt_decompose.xml Sat Jun 04 10:41:29 2016 -0400 @@ -0,0 +1,133 @@ +<tool id="vt_@BINARY@" name="VT @BINARY@" version="@VERSION@.0"> + <description>decomposes multiallelic variants into biallelic ones</description> + <macros> + <import>vt_macros.xml</import> + <token name="@BINARY@">decompose</token> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> + <command> +<![CDATA[ + + ln -s "${ infile }" infile.vcf && + + + vt @BINARY@ + #if str($output_format) == 'bcf': + -o decompose.bcf + #else: + -o decompose.vcf + #end if + $s + infile.vcf + + && + ## For some reason, the file move will randomly produce empty files. + ## Wait two seconds to let the system close file handlers and clean up. + sleep 2 + && + + #if str($output_format) == 'bcf': + mv decompose.bcf "${ outfile }"; + #else: + mv decompose.vcf "${ outfile }"; + #end if + +]]> + </command> + <inputs> + <param name="infile" type="data" format="vcf" label="VCF file to be normalised" /> + + <param argument="-s" type="boolean" truevalue="-s" falsevalue="" + selected="false" label="Smart decomposition" + help="Splits up INFO and GENOTYPE fields that have number counts of R and A appropriately."/> + + <param name="output_format" type="select" label="Choose the output format" help=""> + <option value="bcf">BCF</option> + <option value="vcf" selected="true">VCF</option> + </param> + </inputs> + <outputs> + <data name="outfile" format="vcf" label="${tool.name} on ${on_string}"> + <change_format> + <when input="output_format" value="bcf" format="bcf" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="infile" value="infile01.vcf" /> + <output name="outfile" file="decompose_result01.vcf" ftype="vcf" /> + </test> + <test> + <param name="infile" value="infile02.vcf" /> + <param name="s" value="True" /> + <output name="outfile" file="decompose_result02.vcf" ftype="vcf" /> + </test> + </tests> + <help> +<![CDATA[ + +**What it does** + +Decompose multiallelic variants in a VCF file. +If the VCF file has genotype fields GT,PL, GL or DP, they are modified to reflect the change in alleles. +All other genotype fields are removed. The -s option will retain the fields and decompose fields of counts R and A accordingingly. + +Decomposition and combining variants is a complex operation where the correctness is dependent on: + + * whether the observed variants are seen in the same sample + * if same sample, whether they are homozygous or heterozygous + * if both heterozygous, whether they are in the same haplotype or not (if known) + +and one should be aware of the issues in handling variants resulting from such operations. +The original purpose of this tool is to allow for allelic comparisons between call sets. + +Standard option: + +Before decomposition + +.. code:: + + #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 + 1 3759889 . TA TAA,TAAA,T . PASS AF=0.342,0.173,0.037 GT:DP:PL 1/2:81:281,5,9,58,0,115,338,46,116,809 0/0:86:0,30,323,31,365,483,38,291,325,567 + +After decomposition + +.. code:: + + #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 + 1 3759889 . TA TAA . PASS OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T GT:PL 1/.:281,5,9 0/0:0,30,323 + 1 3759889 . TA TAAA . . OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T GT:PL ./1:281,58,115 0/0:0,31,483 + 1 3759889 . TA T . . OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T GT:PL ./.:281,338,809 0/0:0,38,567 + + +One might want to post process the partial genotypes like 1/. to the best guess genotype based on the PL values. + + +With **-s** option: + +Before decomposition + +.. code:: + + #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 + 1 3759889 . TA TAA,TAAA,T . PASS AF=0.342,0.173,0.037 GT:DP:PL 1/2:81:281,5,9,58,0,115,338,46,116,809 0/0:86:0,30,323,31,365,483,38,291,325,567 + +After decomposition + +.. code:: + + #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 + 1 3759889 . TA TAA . PASS AF=0.342;OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T GT:PL 1/.:281,5,9 0/0:0,30,323 + 1 3759889 . TA TAAA . . AF=0.173;OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T GT:PL ./1:281,58,115 0/0:0,31,483 + 1 3759889 . TA T . . AF=0.037;OLD_MULTIALLELIC=1:3759889:TA/TAA/TAAA/T GT:PL ./.:281,338,809 0/0:0,38,567 + +In general, you should recompute fields that involves alleles after decomposition. Information is generally lost after vertically decomposing a variant, so care should be taken in interpreting the resultant values. + +@CITATION@ +]]> + </help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vt_macros.xml Sat Jun 04 10:41:29 2016 -0400 @@ -0,0 +1,42 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="2015.11.10">vt</requirement> + <yield /> + </requirements> + </xml> + + <xml name="version_command"> + <version_command><![CDATA[ + vt @BINARY@ --help 2>&1 >/dev/null | head -n 1 + ]]> + </version_command> + </xml> + + <xml name="stdio"> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + </xml> + + <token name="@VERSION@">0.2</token> + + <token name="@CITATION@">------ + +**Citation** + +If you use VT in your research, please cite the following project site: + +https://github.com/atks/vt + + </token> + <xml name="citations"> + <citations> + <citation type="doi">10.1093/bioinformatics/btv112</citation> + <yield /> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vt_normalize.xml Sat Jun 04 10:41:29 2016 -0400 @@ -0,0 +1,109 @@ +<tool id="vt_@BINARY@" name="VT @BINARY@" version="@VERSION@.0"> + <description>normalizes variants in a VCF file</description> + <macros> + <import>vt_macros.xml</import> + <token name="@BINARY@">normalize</token> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> + <command> +<![CDATA[ + vt @BINARY@ + #if str($output_format) == 'bcf': + -o normalised.bcf + #else: + -o normalised.vcf + #end if + ##-q do not print options and summary [] + -w $window + + #if str( $intervals_file ) != 'None': + -I "${intervals_file}" + #end if + $n + ##-i intervals [] + + #if $reference_source.reference_source_selector != "history": + -r "${reference_source.reference_genome.fields.path}" + #else: + -r "${reference_source.reference_genome}" + #end if + + "${ infile }" + + && + ## For some reason, the file move will randomly produce empty files. + ## Wait two seconds to let the system close file handlers and clean up. + sleep 2 + && + + #if str($output_format) == 'bcf': + mv normalised.bcf "${ outfile }"; + #else: + mv normalised.vcf "${ outfile }"; + #end if + +]]> + </command> + <inputs> + <param name="infile" type="data" format="vcf" label="VCF file to be normalised" /> + + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> + <option value="cached" selected="True">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param name="reference_genome" type="select" label="Using reference genome"> + <options from_data_table="fasta_indexes"> + <filter type="data_meta" ref="infile" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options> + </param> + </when> + <when value="history"> + <param name="reference_genome" type="data" format="fasta" label="Using reference file" /> + </when> + </conditional> + <param name="intervals_file" type="data" format="bed" optional="True" label="File containing list of intervals" /> + <param name="window" type="integer" value="10000" label="Window size for local sorting of variants" + help="(-w)"> + <validator type="in_range" min="0"/> + </param> + <param argument="-n" type="boolean" truevalue="-n" falsevalue="" + label="Do not fail when REF is inconsistent with reference sequence for non SNPs" + help=""/> + <param name="output_format" type="select" label="Choose the output format" help=""> + <option value="bcf">BCF</option> + <option value="vcf" selected="true">VCF</option> + </param> + </inputs> + <outputs> + <data name="outfile" format="vcf" label="${tool.name} on ${on_string}"> + <change_format> + <when input="output_format" value="bcf" format="bcf" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="infile" value="infile01.vcf" /> + <param name="reference_source_selector" value="history" /> + <param name="reference_genome" value="20.fa.bz2" /> + <param name="n" value="True" /> + <output name="outfile" file="normalize_result01.vcf" ftype="vcf" /> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +Normalize variants in a VCF file. Normalized variants may have their positions changed; in such cases, +the normalized variants are reordered and output in an ordered fashion. The local reordering takes place over a window of 10000 base pairs. + +@CITATION@ +]]> + </help> + <expand macro="citations"/> +</tool>