changeset 0:77ae7fbde89a draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/vcflib/vcfdistance commit 5a4e0ca9992af3a6e5ed2b533f04bb82ce761e0b
author devteam
date Mon, 09 Nov 2015 12:31:21 -0500
parents
children 815622f17a3d
files macros.xml test-data/vcfdistance-test1.vcf test-data/vcflib.vcf tool_dependencies.xml vcfdistance.xml
diffstat 5 files changed, 128 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Nov 09 12:31:21 2015 -0500
@@ -0,0 +1,28 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="8a5602bf07">vcflib</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" level="fatal" />
+        </stdio>
+    </xml>
+   	<xml name="citations">
+   	     <citations>
+             <citation type="bibtex">
+@misc{Garrison2015,
+  author = {Garrison, Erik},
+  year = {2015},
+  title = {vcflib},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/ekg/vcflib},
+}
+             </citation>
+   	     </citations>
+   	</xml>
+    <token name="@IS_PART_OF_VCFLIB@">is a part of VCFlib toolkit developed by Erik Garrison (https://github.com/ekg/vcflib).</token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vcfdistance-test1.vcf	Mon Nov 09 12:31:21 2015 -0500
@@ -0,0 +1,29 @@
+##fileformat=VCFv4.0
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##INFO=<ID=BasesToClosestVariant,Number=1,Type=Integer,Description="Number of bases to the closest variant in the file.">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+19	111	.	A	C	9.6	.	BasesToClosestVariant=1	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+19	112	.	A	G	10	.	BasesToClosestVariant=1	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+20	17330	.	T	A	3	q10	AF=0.017;BasesToClosestVariant=1093366;DP=11;NS=3	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3:.,.
+20	1110696	rs6040355	A	G,T	67	PASS	AA=T;AF=0.333,0.667;BasesToClosestVariant=119541;DP=10;NS=2;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4:.,.
+20	1230237	.	T	.	47	PASS	AA=T;BasesToClosestVariant=4330;DP=13;NS=3	GT:GQ:DP:HQ	0|0:54:.:56,60	0|0:48:4:51,51	0/0:61:2:.,.
+20	1234567	microsat1	G	GA,GAC	50	PASS	AA=G;AC=3,1;AN=6;BasesToClosestVariant=670;DP=9;NS=3	GT:GQ:DP	0/1:.:4	0/2:17:2	1/1:40:3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vcflib.vcf	Mon Nov 09 12:31:21 2015 -0500
@@ -0,0 +1,31 @@
+##fileformat=VCFv4.0
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+##ALT=<ID=DEL:ME:ALU,Description="Deletion of ALU element">
+##ALT=<ID=CNV,Description="Copy number variable region">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
+19	111	.	A	C	9.6	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+19	112	.	A	G	10	.	.	GT:HQ	0|0:10,10	0|0:10,10	0/1:3,3
+20	14370	rs6054257	G	A	29	PASS	NS=3;DP=14;AF=0.5;DB;H2	GT:GQ:DP:HQ	0|0:48:1:51,51	1|0:48:8:51,51	1/1:43:5:.,.
+20	17330	.	T	A	3	q10	NS=3;DP=11;AF=0.017	GT:GQ:DP:HQ	0|0:49:3:58,50	0|1:3:5:65,3	0/0:41:3:.,.
+20	1110696	rs6040355	A	G,T	67	PASS	NS=2;DP=10;AF=0.333,0.667;AA=T;DB	GT:GQ:DP:HQ	1|2:21:6:23,27	2|1:2:0:18,2	2/2:35:4:.,.
+20	1230237	.	T	.	47	PASS	NS=3;DP=13;AA=T	GT:GQ:DP:HQ	0|0:54:.:56,60	0|0:48:4:51,51	0/0:61:2:.,.
+20	1234567	microsat1	G	GA,GAC	50	PASS	NS=3;DP=9;AA=G;AN=6;AC=3,1	GT:GQ:DP	0/1:.:4	0/2:17:2	1/1:40:3
+20	1235237	.	T	.	.	.	.	GT	0/0	0|0	./.
+X	10	rsTest	AC	A,ATG	10	PASS	.	GT	0	0/1	0|2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Mon Nov 09 12:31:21 2015 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="vcflib" version="8a5602bf07">
+        <repository changeset_revision="3ac0905f7b7c" name="package_vcflib_8a5602bf07" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vcfdistance.xml	Mon Nov 09 12:31:21 2015 -0500
@@ -0,0 +1,34 @@
+<tool id="vcfdistance" name="VCFdistance:" version="0.0.3">
+  <description>Calculate distance to the nearest variant</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements"></expand>
+  <expand macro="stdio" />
+  <command>cat "${input1}" | vcfdistance > "${out_file1}"</command>
+  <inputs>
+    <param format="vcf" name="input1" type="data" label="Select VCF dataset"/>
+  </inputs>
+  <outputs>
+    <data format="vcf" name="out_file1" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="input1" value="vcflib.vcf"/>
+      <output name="out_file1" file="vcfdistance-test1.vcf"/>
+    </test>
+    </tests>
+  <help>
+
+Adds a value to each VCF record indicating the distance to the nearest variant in the file.
+
+.. class:: infomark
+
+The dataset used as input to this tool must be coordinate sorted. This can be achieved by either using the VCFsort utility or Galaxy's general purpose sort tool (in this case sort on the first and the second column in ascending order).
+
+----
+
+Vcfdistance @IS_PART_OF_VCFLIB@
+</help>
+  <expand macro="citations" />
+</tool>