comparison snpSift_caseControl.xml @ 0:1ae5526db990

Uploaded
author jjohnson
date Thu, 26 Jun 2014 09:22:13 -0400
parents
children 796388c291d3
comparison
equal deleted inserted replaced
-1:000000000000 0:1ae5526db990
1 <tool id="snpSift_caseControl" name="SnpSift CaseControl" version="3.6">
2 <description>Count samples are in 'case' and 'control' groups.</description>
3 <!--
4 You can change the amount of memory used, just change the -Xmx parameter (e.g. use -Xmx2G for 2Gb of memory)
5 -->
6 <expand macro="requirements" />
7 <macros>
8 <import>snpEff_macros.xml</import>
9 </macros>
10 <command>
11 java -Xmx1G -jar \$SNPEFF_JAR_PATH/SnpSift.jar caseControl -q
12 #if $name.__str__.strip() != '':
13 -name $name
14 #end if
15 #if $ctrl.ctrl_src == 'caseString':
16 '$ctrl.caseControlStr'
17 #else
18 -tfam "$ctrl.tfam"
19 #end if
20 $input > $output
21 </command>
22 <inputs>
23 <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
24 <conditional name="ctrl">
25 <param name="ctrl_src" type="select" label="Case Control defined in">
26 <option value="caseString">Case Control String</option>
27 <option value="tfam">TFAM file</option>
28 </param>
29 <when value="caseString">
30 <param name="caseControlStr" type="text" label="Case / Control column designation" size="50">
31 <help>
32 Case and control are defined by a string containing plus and minus symbols {'+', '-', '0'} where '+' is case, '-' is control and '0' is neutral
33 </help>
34 <validator type="regex" message="must be only plus(+), minus(-), or zero(0) characters">[+-0]+</validator>
35 </param>
36 </when>
37 <when value="tfam">
38 <param format="tabular" name="tfam" type="data" label="PLINK TFAM file" help="Read more about TFAM at http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#tr"/>
39 </when>
40 </conditional>
41 <param name="name" type="text" optional="true" label="name" help="name to append to the 'Cases' or 'Controls' tags">
42 <validator type="regex" message="Use only valid ID characters">[_a-zA-Z0-9]+</validator>
43 </param>
44 </inputs>
45 <outputs>
46 <data format="vcf" name="output" />
47 </outputs>
48 <expand macro="stdio" />
49 <tests>
50 <test>
51 <param name="input" ftype="vcf" value="test.private.01.vcf"/>
52 <param name="ctrl_src" value="caseString"/>
53 <param name="caseControlStr" value="--"/>
54 <output name="output">
55 <assert_contents>
56 <has_text text="Cases=0,0,0;" />
57 <has_text text="Controls=0,0,0;" />
58 </assert_contents>
59 </output>
60 </test>
61
62 <test>
63 <param name="input" ftype="vcf" value="test.private.02.vcf"/>
64 <param name="ctrl_src" value="caseString"/>
65 <param name="caseControlStr" value="--"/>
66 <output name="output">
67 <assert_contents>
68 <has_text text="Cases=0,0,0;" />
69 <has_text text="Controls=2,0,4;" />
70 </assert_contents>
71 </output>
72 </test>
73
74 <test>
75 <param name="input" ftype="vcf" value="test.private.02.vcf"/>
76 <param name="name" value=""/>
77 <param name="ctrl_src" value="caseString"/>
78 <param name="caseControlStr" value="-+"/>
79 <output name="output">
80 <assert_contents>
81 <has_text text="Cases=1,0,2;" />
82 <has_text text="Controls=1,0,2;" />
83 </assert_contents>
84 </output>
85 </test>
86 </tests>
87 <help>
88
89 **SnpSift CaseControl**
90
91 Allows you to count how many samples are in 'case' group and a 'control' group. You can count 'homozygous', 'heterozygous' or 'any' variants.
92
93 Case and control are defined by a string containing plus and minus symbols {'+', '-', '0'} where '+' is case, '-' is control and '0' is neutral.
94
95 This command adds two annotations to the VCF file:
96
97 - **CaseControl**: Two comma separated numbers numbers representing the number of samples that have the variant in the case and the control group. Example:
98
99 "CaseControl=3,4" *the variant is present in 3 cases and 4 controls.*
100
101
102 - **CaseControlP**: A p-value (Fisher exact test) that the number of cases is N or more. Example:
103
104 "CaseControl=4,0;CaseControlP=3.030303e-02" *in this case the pValue of having 4 or more cases and zero controls is 0.03*
105
106
107 For example, if we have ten samples (which means ten genotype columns in the VCF file), the first four are 'case' and the last six are 'control', so the description string would be "++++------". Let's say we want to distinguish genotypes that are homozygous in 'case' and either homozygous or heterozygous in 'control'. We would set:
108
109 - Hom/Het case = "hom"
110
111 - Hom/Het control = "any"
112
113 - Case / Control column designation = ""++++------"
114
115
116 @EXTERNAL_DOCUMENTATION@
117 http://snpeff.sourceforge.net/SnpSift.html#casecontrol
118
119 @CITATION_SECTION@
120
121 </help>
122 </tool>