Mercurial > repos > bcrain-completegenomics > testing4
comparison scripts/tools/cg_scripts/List_Unique_Variants.xml @ 0:951ae80a19fc draft
Uploaded
author | bcrain-completegenomics |
---|---|
date | Tue, 12 Jun 2012 14:42:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:951ae80a19fc |
---|---|
1 <tool id="pl_listuniquevariants" name="List_Unique_Variants" version="0.0.1"> | |
2 | |
3 <description>with annotations from gene or var files</description> <!--adds description in toolbar--> | |
4 | |
5 <command interpreter="perl"> <!--run executable--> | |
6 #if $file_types.file_type =="var2" | |
7 List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output | |
8 --Var_Type $file_types.variants | |
9 $file_types.scoresVAF | |
10 $file_types.scoresEAF | |
11 $file_types.varQuality | |
12 #if $file_types.data_sources.data_source == "in" | |
13 #for $v in $file_types.data_sources.varfiles <!--get each var file--> | |
14 --Input_File ${v.input} | |
15 #end for | |
16 #else | |
17 `cat $file_types.data_sources.varlist` | |
18 #end if | |
19 | |
20 #else if $file_types.file_type =="var1" | |
21 List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output | |
22 --Var_Type $file_types.variants | |
23 $file_types.scores | |
24 #if $file_types.data_sources.data_source == "in" | |
25 #for $v in $file_types.data_sources.varfiles <!--get each var file--> | |
26 --Input_File ${v.input} | |
27 #end for | |
28 #else | |
29 `cat $file_types.data_sources.varlist` | |
30 #end if | |
31 | |
32 #else if $file_types.file_type =="gene" | |
33 List_Unique_Variants_2_1_0.pl --File_Type G --Output_File $output | |
34 --Var_Type $file_types.variants | |
35 --Component $file_types.component | |
36 --Impact $file_types.impact | |
37 #if $file_types.data_sources.data_source == "in" | |
38 #for $g in $file_types.data_sources.genefiles <!--get each var file--> | |
39 --Input_File ${g.input} | |
40 #end for | |
41 #else | |
42 `cat $file_types.data_sources.genelist` | |
43 #end if | |
44 #end if | |
45 </command> | |
46 | |
47 <outputs> | |
48 <data format="tabular" name="output" /> | |
49 </outputs> | |
50 | |
51 <inputs> | |
52 <conditional name="file_types"> | |
53 <!--form field to select file type--> | |
54 <param name="file_type" type="select" label="Select the input file type"> | |
55 <option value="var2" selected="True">var files, format 2.x</option> | |
56 <option value="var1">var files, format 1.x</option> | |
57 <option value="gene">gene files</option> | |
58 </param> | |
59 | |
60 <when value="var2"> | |
61 <!--form field to select all variant types to annotate--> | |
62 <param name="variants" label="Select variant types to include" type="select" multiple="true" > | |
63 <!--<validator type="no_options" message="Please select at least one variant type."/>--> | |
64 <option value="All" selected="true">All</option> | |
65 <option value="snp">snp</option> | |
66 <option value="ins">ins</option> | |
67 <option value="del">del</option> | |
68 <option value="sub">sub</option> | |
69 <option value="ref">ref</option> | |
70 </param> | |
71 | |
72 <!--form field to select varScoresVAF--> | |
73 <param name="scoresVAF" type="select" label="Include varScoreVAF?"> | |
74 <option value="--Scores_VAF yes" selected="true">yes</option> | |
75 <option value="--Scores_VAF no">no</option> | |
76 </param> | |
77 <!--form field to select varScoresEAF--> | |
78 <param name="scoresEAF" type="select" label="Include varScoreEAF?"> | |
79 <option value="--Scores_EAF yes" selected="true">yes</option> | |
80 <option value="--Scores_EAF no">no</option> | |
81 </param> | |
82 <!--form field to select varQuality--> | |
83 <param name="varQuality" type="select" label="Include varQuality?"> | |
84 <option value="--Score_Qualities yes" selected="true">yes</option> | |
85 <option value="--Score_Qualities no">no</option> | |
86 </param> | |
87 | |
88 <!--conditional to select variant file input--> | |
89 <conditional name="data_sources"> | |
90 <param name="data_source" type="select" label="Where are the input var files?"> | |
91 <option value="in" selected="true">imported into Galaxy</option> | |
92 <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> | |
93 </param> | |
94 <when value="in"> | |
95 <!--form field to select variant files--> | |
96 <repeat name="varfiles" title="Variant files"> | |
97 <param name="input" type="data" format="cg_var" label="Dataset"> | |
98 <validator type="unspecified_build" /> | |
99 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" | |
100 metadata_name="dbkey" metadata_column="1" | |
101 message="cgatools is not currently available for this build."/> | |
102 </param> | |
103 </repeat> | |
104 </when> | |
105 <when value="out"> | |
106 <!--form field to select crr file--> | |
107 <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/> | |
108 </when> | |
109 </conditional> | |
110 </when> | |
111 | |
112 <when value="var1"> | |
113 <!--form field to select all variant types to annotate--> | |
114 <param name="variants" label="Select variant types to include" type="select" multiple="true" > | |
115 <!--<validator type="no_options" message="Please select at least one variant type."/>--> | |
116 <option value="All" selected="true">All</option> | |
117 <option value="snp">snp</option> | |
118 <option value="ins">ins</option> | |
119 <option value="del">del</option> | |
120 <option value="sub">sub</option> | |
121 <option value="ref">ref</option> | |
122 </param> | |
123 | |
124 <!--form field to select scores--> | |
125 <param name="scores" type="select" label="Include totalScore?"> | |
126 <option value="--Scores yes" selected="true">yes</option> | |
127 <option value="--Scores no">no</option> | |
128 </param> | |
129 | |
130 <!--conditional to select variant file input--> | |
131 <conditional name="data_sources"> | |
132 <param name="data_source" type="select" label="Where are the input var files?"> | |
133 <option value="in" selected="true">imported into Galaxy</option> | |
134 <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> | |
135 </param> | |
136 <when value="in"> | |
137 <!--form field to select variant files--> | |
138 <repeat name="varfiles" title="Variant files"> | |
139 <param name="input" type="data" format="cg_var" label="Dataset"> | |
140 <validator type="unspecified_build" /> | |
141 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" | |
142 metadata_name="dbkey" metadata_column="1" | |
143 message="cgatools is not currently available for this build."/> | |
144 </param> | |
145 </repeat> | |
146 </when> | |
147 <when value="out"> | |
148 <!--form field to select crr file--> | |
149 <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/> | |
150 </when> | |
151 </conditional> | |
152 </when> | |
153 | |
154 <when value="gene"> | |
155 <!--form field to select all variant types to annotate--> | |
156 <param name="variants" label="Select variant types to include" type="select" multiple="true" > | |
157 <!--<validator type="no_options" message="Please select at least one variant type."/>--> | |
158 <option value="All" selected="true">All</option> | |
159 <option value="snp">snp</option> | |
160 <option value="ins">ins</option> | |
161 <option value="del">del</option> | |
162 <option value="sub">sub</option> | |
163 <option value="ref">ref</option> | |
164 </param> | |
165 | |
166 <!--form field to select component in gene file--> | |
167 <param name="component" type="select" label="Select component types to include" multiple="true" > | |
168 <option value="All" selected="true">All</option> | |
169 <option value="CDS">CDS</option> | |
170 <option value="INTRON">INTRON</option> | |
171 <option value="DONOR">DONOR</option> | |
172 <option value="ACCEPTOR">ACCEPTOR</option> | |
173 <option value="TSS-UPSTREAM">TSS-UPSTREAM</option> | |
174 <option value="SPAN5">SPAN5</option> | |
175 <option value="SPAN3">SPAN3</option> | |
176 <option value="SPAN">SPAN</option> | |
177 <option value="UTR5">UTR5</option> | |
178 <option value="UTR3">UTR3</option> | |
179 <option value="UTR">UTR</option> | |
180 </param> | |
181 | |
182 <!--form field to select impact in gene file--> | |
183 <param name="impact" type="select" label="Select impact types to include" multiple="true" > | |
184 <option value="All" selected="true">All</option> | |
185 <option value="NO-CHANGE">NO-CHANGE</option> | |
186 <option value="SYNONYMOUS">SYNONYMOUS</option> | |
187 <option value="MISSENES">MISSENES</option> | |
188 <option value="NONSENSE">NONSENSE</option> | |
189 <option value="NONSSTOP">NONSSTOP</option> | |
190 <option value="DELETE">DELETE</option> | |
191 <option value="INSERT">INSERT</option> | |
192 <option value="DELETE+">DELETE+</option> | |
193 <option value="INSERT+">INSERT+</option> | |
194 <option value="FRAMESHIFT">FRAMESHIFT</option> | |
195 <option value="MISSTART">MISSTART</option> | |
196 <option value="DISRUPT">DISRUPT</option> | |
197 <option value="UNKNOWN-VNC">UNKNOWN-VNC</option> | |
198 <option value="UNKNOWN-INC">UNKNOWN-INC</option> | |
199 <option value="UNKNOWN-TR">UNKNOWN-TR</option> | |
200 </param> | |
201 | |
202 <!--conditional to select gene file input--> | |
203 <conditional name="data_sources"> | |
204 <param name="data_source" type="select" label="Where are the input gene files?"> | |
205 <option value="in" selected="true">imported into Galaxy</option> | |
206 <option value="out">located outside Galaxy (available only for local Galaxy instances)</option> | |
207 </param> | |
208 <when value="in"> | |
209 <!--form field to select variant files--> | |
210 <repeat name="genefiles" title="Gene files"> | |
211 <param name="input" type="data" format="cg_gene" label="Dataset"> | |
212 <validator type="unspecified_build" /> | |
213 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc" | |
214 metadata_name="dbkey" metadata_column="1" | |
215 message="cgatools is not currently available for this build."/> | |
216 </param> | |
217 </repeat> | |
218 </when> | |
219 <when value="out"> | |
220 <!--form field to select crr file--> | |
221 <param name="genelist" type="text" label="List of gene files (/path/file)" size="200" help="file with list of gene files (/path/genefile), gene files can be compressed (gz, bz2)."/> | |
222 </when> | |
223 </conditional> | |
224 </when> | |
225 | |
226 </conditional> | |
227 </inputs> | |
228 | |
229 | |
230 <help> | |
231 | |
232 **What it does** | |
233 | |
234 This tool identifies all called variants present in the var or gene files and generates annotated variant list. | |
235 | |
236 ----- | |
237 | |
238 **Instructions**:: | |
239 | |
240 List Unique Variants for Pipeline 1.x and 2.x | |
241 [Uses header if available, checks for position of xref field if not] | |
242 Take one or more var or gene files | |
243 Extract a non-redundant set of variants | |
244 | |
245 For var files: | |
246 The fields used to define non-redundant variants are are: | |
247 chromosome begin end varType reference alleleSeq xRef | |
248 User can nominate class(es) of varType to filter on | |
249 Outputs varScoreEAF, varScoreVAF and varQuality as a default but user can turn | |
250 them off (separately) | |
251 Scores and qualities stored in separate fields, all values for a variant across | |
252 a set of genomes. | |
253 Values for different genomes separated by ':', for two hom entries for the same | |
254 genome by '|' | |
255 Output is accepted by testvariants to generate a variant table, all fields kept | |
256 in testvariants output | |
257 | |
258 For gene files: | |
259 The fields used to define non-redundant gene variants are: | |
260 chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol | |
261 orientation component componentIndex codingRegionKnown impact nucleotidePos | |
262 proteinPos annotationRefSequence sampleSequence genomeRefSequence | |
263 User can nominate class(es) of varType, component or impact to filter on | |
264 All gene entries kept ie multiple entries if multiple transcripts | |
265 | |
266 NB Now treating xref as a separate component in var recs, as it is not consistent | |
267 between X and Y vars | |
268 Not fixed for gene recs yet | |
269 | |
270 perl List_Unique_Variants_2_0_11.pl | |
271 --File_Type [V|G] | |
272 --Input_File input_file_1 [set of var or gene files] | |
273 --Input_File input_file_2 | |
274 ... | |
275 --Input_File input_file_n | |
276 --Output_File filename | |
277 --Var_Type [For both file types, 'All' or any value from the varType field, | |
278 multiple values allowed, separated by comma] | |
279 --Component [Gene file specific,'All' or any value from component field of gene | |
280 file, multiple allowed; 'All" is default] | |
281 --Impact All [Gene file specific,'All' or any value from impact field of gene | |
282 file, multiple allowed; 'All" is default] | |
283 --Scores [1.x var file specific, yes|no, yes is default] | |
284 --Scores_VAF [2.0 var file specific, yes|no, yes is default] | |
285 --Scores_EAF [2.0 var file specific, yes|no, yes is default] | |
286 --Score_Qualities [yes|no, yes is default] | |
287 eg | |
288 perl List_Unique_Variants_2_0_11.pl \ | |
289 --File_Type V \ | |
290 --Input_File /Yoruban_Trio_1100_37/GS19238-1100-37/GS00028-DNA_A01/ASM/gene-GS19238-1100-37-ASM.tsv.bz2 \ | |
291 --Input_File /Yoruban_Trio_1100_37/GS19239-1100-37/GS00028-DNA_B01/ASM/gene-GS19239-1100-37-ASM.tsv.bz2 \ | |
292 --Input_File /Yoruban_Trio_1100_37/GS19240-1100-37/GS00028-DNA_C01/ASM/gene-GS19240-1100-37-ASM.tsv.bz2 \ | |
293 --Output_File /Users/rtearle/Documents/TBF/YRI_Trio_Protein_Coding.tsv \ | |
294 --Var_Type All | |
295 --Component All | |
296 --Impact All | |
297 --Scores_VAF yes \ | |
298 --Scores_EAF yes \ | |
299 --Score_Qualities yes | |
300 | |
301 var fields | |
302 1.x locus ploidy haplotype chromosome begin end varType reference alleleSeq | |
303 totalScore hapLink xRef | |
304 2.0 locus ploidy allele chromosome begin end varType reference alleleSeq | |
305 varScoreVAF varScoreEAF varQuality hapLink xRef | |
306 | |
307 gene fields | |
308 1.x index locus allele chromosome begin end varType reference call xRef geneId | |
309 mrnaAcc proteinAcc symbol orientation component componentIndex | |
310 codingRegionKnown impact nucleotidePos proteinPos annotationRefSequence | |
311 sampleSequence genomeRefSequence | |
312 2.0 index locus allele chromosome begin end varType reference call xRef geneId | |
313 mrnaAcc proteinAcc symbol orientation component componentIndex hasCodingRegion | |
314 impact nucleotidePos proteinPos annotationRefSequence sampleSequence | |
315 genomeRefSequence pfam | |
316 | |
317 Parsing and storing input parameters | |
318 Only input_file fields can be repeated | |
319 input paramaters are case insensitive | |
320 | |
321 | |
322 </help> | |
323 </tool> |