comparison scripts/tools/cg_scripts/List_Unique_Variants.xml @ 0:951ae80a19fc draft

Uploaded
author bcrain-completegenomics
date Tue, 12 Jun 2012 14:42:04 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:951ae80a19fc
1 <tool id="pl_listuniquevariants" name="List_Unique_Variants" version="0.0.1">
2
3 <description>with annotations from gene or var files</description> <!--adds description in toolbar-->
4
5 <command interpreter="perl"> <!--run executable-->
6 #if $file_types.file_type =="var2"
7 List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output
8 --Var_Type $file_types.variants
9 $file_types.scoresVAF
10 $file_types.scoresEAF
11 $file_types.varQuality
12 #if $file_types.data_sources.data_source == "in"
13 #for $v in $file_types.data_sources.varfiles <!--get each var file-->
14 --Input_File ${v.input}
15 #end for
16 #else
17 `cat $file_types.data_sources.varlist`
18 #end if
19
20 #else if $file_types.file_type =="var1"
21 List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output
22 --Var_Type $file_types.variants
23 $file_types.scores
24 #if $file_types.data_sources.data_source == "in"
25 #for $v in $file_types.data_sources.varfiles <!--get each var file-->
26 --Input_File ${v.input}
27 #end for
28 #else
29 `cat $file_types.data_sources.varlist`
30 #end if
31
32 #else if $file_types.file_type =="gene"
33 List_Unique_Variants_2_1_0.pl --File_Type G --Output_File $output
34 --Var_Type $file_types.variants
35 --Component $file_types.component
36 --Impact $file_types.impact
37 #if $file_types.data_sources.data_source == "in"
38 #for $g in $file_types.data_sources.genefiles <!--get each var file-->
39 --Input_File ${g.input}
40 #end for
41 #else
42 `cat $file_types.data_sources.genelist`
43 #end if
44 #end if
45 </command>
46
47 <outputs>
48 <data format="tabular" name="output" />
49 </outputs>
50
51 <inputs>
52 <conditional name="file_types">
53 <!--form field to select file type-->
54 <param name="file_type" type="select" label="Select the input file type">
55 <option value="var2" selected="True">var files, format 2.x</option>
56 <option value="var1">var files, format 1.x</option>
57 <option value="gene">gene files</option>
58 </param>
59
60 <when value="var2">
61 <!--form field to select all variant types to annotate-->
62 <param name="variants" label="Select variant types to include" type="select" multiple="true" >
63 <!--<validator type="no_options" message="Please select at least one variant type."/>-->
64 <option value="All" selected="true">All</option>
65 <option value="snp">snp</option>
66 <option value="ins">ins</option>
67 <option value="del">del</option>
68 <option value="sub">sub</option>
69 <option value="ref">ref</option>
70 </param>
71
72 <!--form field to select varScoresVAF-->
73 <param name="scoresVAF" type="select" label="Include varScoreVAF?">
74 <option value="--Scores_VAF yes" selected="true">yes</option>
75 <option value="--Scores_VAF no">no</option>
76 </param>
77 <!--form field to select varScoresEAF-->
78 <param name="scoresEAF" type="select" label="Include varScoreEAF?">
79 <option value="--Scores_EAF yes" selected="true">yes</option>
80 <option value="--Scores_EAF no">no</option>
81 </param>
82 <!--form field to select varQuality-->
83 <param name="varQuality" type="select" label="Include varQuality?">
84 <option value="--Score_Qualities yes" selected="true">yes</option>
85 <option value="--Score_Qualities no">no</option>
86 </param>
87
88 <!--conditional to select variant file input-->
89 <conditional name="data_sources">
90 <param name="data_source" type="select" label="Where are the input var files?">
91 <option value="in" selected="true">imported into Galaxy</option>
92 <option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
93 </param>
94 <when value="in">
95 <!--form field to select variant files-->
96 <repeat name="varfiles" title="Variant files">
97 <param name="input" type="data" format="cg_var" label="Dataset">
98 <validator type="unspecified_build" />
99 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
100 metadata_name="dbkey" metadata_column="1"
101 message="cgatools is not currently available for this build."/>
102 </param>
103 </repeat>
104 </when>
105 <when value="out">
106 <!--form field to select crr file-->
107 <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/>
108 </when>
109 </conditional>
110 </when>
111
112 <when value="var1">
113 <!--form field to select all variant types to annotate-->
114 <param name="variants" label="Select variant types to include" type="select" multiple="true" >
115 <!--<validator type="no_options" message="Please select at least one variant type."/>-->
116 <option value="All" selected="true">All</option>
117 <option value="snp">snp</option>
118 <option value="ins">ins</option>
119 <option value="del">del</option>
120 <option value="sub">sub</option>
121 <option value="ref">ref</option>
122 </param>
123
124 <!--form field to select scores-->
125 <param name="scores" type="select" label="Include totalScore?">
126 <option value="--Scores yes" selected="true">yes</option>
127 <option value="--Scores no">no</option>
128 </param>
129
130 <!--conditional to select variant file input-->
131 <conditional name="data_sources">
132 <param name="data_source" type="select" label="Where are the input var files?">
133 <option value="in" selected="true">imported into Galaxy</option>
134 <option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
135 </param>
136 <when value="in">
137 <!--form field to select variant files-->
138 <repeat name="varfiles" title="Variant files">
139 <param name="input" type="data" format="cg_var" label="Dataset">
140 <validator type="unspecified_build" />
141 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
142 metadata_name="dbkey" metadata_column="1"
143 message="cgatools is not currently available for this build."/>
144 </param>
145 </repeat>
146 </when>
147 <when value="out">
148 <!--form field to select crr file-->
149 <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/>
150 </when>
151 </conditional>
152 </when>
153
154 <when value="gene">
155 <!--form field to select all variant types to annotate-->
156 <param name="variants" label="Select variant types to include" type="select" multiple="true" >
157 <!--<validator type="no_options" message="Please select at least one variant type."/>-->
158 <option value="All" selected="true">All</option>
159 <option value="snp">snp</option>
160 <option value="ins">ins</option>
161 <option value="del">del</option>
162 <option value="sub">sub</option>
163 <option value="ref">ref</option>
164 </param>
165
166 <!--form field to select component in gene file-->
167 <param name="component" type="select" label="Select component types to include" multiple="true" >
168 <option value="All" selected="true">All</option>
169 <option value="CDS">CDS</option>
170 <option value="INTRON">INTRON</option>
171 <option value="DONOR">DONOR</option>
172 <option value="ACCEPTOR">ACCEPTOR</option>
173 <option value="TSS-UPSTREAM">TSS-UPSTREAM</option>
174 <option value="SPAN5">SPAN5</option>
175 <option value="SPAN3">SPAN3</option>
176 <option value="SPAN">SPAN</option>
177 <option value="UTR5">UTR5</option>
178 <option value="UTR3">UTR3</option>
179 <option value="UTR">UTR</option>
180 </param>
181
182 <!--form field to select impact in gene file-->
183 <param name="impact" type="select" label="Select impact types to include" multiple="true" >
184 <option value="All" selected="true">All</option>
185 <option value="NO-CHANGE">NO-CHANGE</option>
186 <option value="SYNONYMOUS">SYNONYMOUS</option>
187 <option value="MISSENES">MISSENES</option>
188 <option value="NONSENSE">NONSENSE</option>
189 <option value="NONSSTOP">NONSSTOP</option>
190 <option value="DELETE">DELETE</option>
191 <option value="INSERT">INSERT</option>
192 <option value="DELETE+">DELETE+</option>
193 <option value="INSERT+">INSERT+</option>
194 <option value="FRAMESHIFT">FRAMESHIFT</option>
195 <option value="MISSTART">MISSTART</option>
196 <option value="DISRUPT">DISRUPT</option>
197 <option value="UNKNOWN-VNC">UNKNOWN-VNC</option>
198 <option value="UNKNOWN-INC">UNKNOWN-INC</option>
199 <option value="UNKNOWN-TR">UNKNOWN-TR</option>
200 </param>
201
202 <!--conditional to select gene file input-->
203 <conditional name="data_sources">
204 <param name="data_source" type="select" label="Where are the input gene files?">
205 <option value="in" selected="true">imported into Galaxy</option>
206 <option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
207 </param>
208 <when value="in">
209 <!--form field to select variant files-->
210 <repeat name="genefiles" title="Gene files">
211 <param name="input" type="data" format="cg_gene" label="Dataset">
212 <validator type="unspecified_build" />
213 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
214 metadata_name="dbkey" metadata_column="1"
215 message="cgatools is not currently available for this build."/>
216 </param>
217 </repeat>
218 </when>
219 <when value="out">
220 <!--form field to select crr file-->
221 <param name="genelist" type="text" label="List of gene files (/path/file)" size="200" help="file with list of gene files (/path/genefile), gene files can be compressed (gz, bz2)."/>
222 </when>
223 </conditional>
224 </when>
225
226 </conditional>
227 </inputs>
228
229
230 <help>
231
232 **What it does**
233
234 This tool identifies all called variants present in the var or gene files and generates annotated variant list.
235
236 -----
237
238 **Instructions**::
239
240 List Unique Variants for Pipeline 1.x and 2.x
241 [Uses header if available, checks for position of xref field if not]
242 Take one or more var or gene files
243 Extract a non-redundant set of variants
244
245 For var files:
246 The fields used to define non-redundant variants are are:
247 chromosome begin end varType reference alleleSeq xRef
248 User can nominate class(es) of varType to filter on
249 Outputs varScoreEAF, varScoreVAF and varQuality as a default but user can turn
250 them off (separately)
251 Scores and qualities stored in separate fields, all values for a variant across
252 a set of genomes.
253 Values for different genomes separated by ':', for two hom entries for the same
254 genome by '|'
255 Output is accepted by testvariants to generate a variant table, all fields kept
256 in testvariants output
257
258 For gene files:
259 The fields used to define non-redundant gene variants are:
260 chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol
261 orientation component componentIndex codingRegionKnown impact nucleotidePos
262 proteinPos annotationRefSequence sampleSequence genomeRefSequence
263 User can nominate class(es) of varType, component or impact to filter on
264 All gene entries kept ie multiple entries if multiple transcripts
265
266 NB Now treating xref as a separate component in var recs, as it is not consistent
267 between X and Y vars
268 Not fixed for gene recs yet
269
270 perl List_Unique_Variants_2_0_11.pl
271 --File_Type [V|G]
272 --Input_File input_file_1 [set of var or gene files]
273 --Input_File input_file_2
274 ...
275 --Input_File input_file_n
276 --Output_File filename
277 --Var_Type [For both file types, 'All' or any value from the varType field,
278 multiple values allowed, separated by comma]
279 --Component [Gene file specific,'All' or any value from component field of gene
280 file, multiple allowed; 'All" is default]
281 --Impact All [Gene file specific,'All' or any value from impact field of gene
282 file, multiple allowed; 'All" is default]
283 --Scores [1.x var file specific, yes|no, yes is default]
284 --Scores_VAF [2.0 var file specific, yes|no, yes is default]
285 --Scores_EAF [2.0 var file specific, yes|no, yes is default]
286 --Score_Qualities [yes|no, yes is default]
287 eg
288 perl List_Unique_Variants_2_0_11.pl \
289 --File_Type V \
290 --Input_File /Yoruban_Trio_1100_37/GS19238-1100-37/GS00028-DNA_A01/ASM/gene-GS19238-1100-37-ASM.tsv.bz2 \
291 --Input_File /Yoruban_Trio_1100_37/GS19239-1100-37/GS00028-DNA_B01/ASM/gene-GS19239-1100-37-ASM.tsv.bz2 \
292 --Input_File /Yoruban_Trio_1100_37/GS19240-1100-37/GS00028-DNA_C01/ASM/gene-GS19240-1100-37-ASM.tsv.bz2 \
293 --Output_File /Users/rtearle/Documents/TBF/YRI_Trio_Protein_Coding.tsv \
294 --Var_Type All
295 --Component All
296 --Impact All
297 --Scores_VAF yes \
298 --Scores_EAF yes \
299 --Score_Qualities yes
300
301 var fields
302 1.x locus ploidy haplotype chromosome begin end varType reference alleleSeq
303 totalScore hapLink xRef
304 2.0 locus ploidy allele chromosome begin end varType reference alleleSeq
305 varScoreVAF varScoreEAF varQuality hapLink xRef
306
307 gene fields
308 1.x index locus allele chromosome begin end varType reference call xRef geneId
309 mrnaAcc proteinAcc symbol orientation component componentIndex
310 codingRegionKnown impact nucleotidePos proteinPos annotationRefSequence
311 sampleSequence genomeRefSequence
312 2.0 index locus allele chromosome begin end varType reference call xRef geneId
313 mrnaAcc proteinAcc symbol orientation component componentIndex hasCodingRegion
314 impact nucleotidePos proteinPos annotationRefSequence sampleSequence
315 genomeRefSequence pfam
316
317 Parsing and storing input parameters
318 Only input_file fields can be repeated
319 input paramaters are case insensitive
320
321
322 </help>
323 </tool>