0
|
1 <tool id="pl_listuniquevariants" name="List_Unique_Variants" version="0.0.1">
|
|
2
|
|
3 <description>with annotations from gene or var files</description> <!--adds description in toolbar-->
|
|
4
|
|
5 <command interpreter="perl"> <!--run executable-->
|
|
6 #if $file_types.file_type =="var2"
|
|
7 List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output
|
|
8 --Var_Type $file_types.variants
|
|
9 $file_types.scoresVAF
|
|
10 $file_types.scoresEAF
|
|
11 $file_types.varQuality
|
|
12 #if $file_types.data_sources.data_source == "in"
|
|
13 #for $v in $file_types.data_sources.varfiles <!--get each var file-->
|
|
14 --Input_File ${v.input}
|
|
15 #end for
|
|
16 #else
|
|
17 `cat $file_types.data_sources.varlist`
|
|
18 #end if
|
|
19
|
|
20 #else if $file_types.file_type =="var1"
|
|
21 List_Unique_Variants_2_1_0.pl --File_Type V --Output_File $output
|
|
22 --Var_Type $file_types.variants
|
|
23 $file_types.scores
|
|
24 #if $file_types.data_sources.data_source == "in"
|
|
25 #for $v in $file_types.data_sources.varfiles <!--get each var file-->
|
|
26 --Input_File ${v.input}
|
|
27 #end for
|
|
28 #else
|
|
29 `cat $file_types.data_sources.varlist`
|
|
30 #end if
|
|
31
|
|
32 #else if $file_types.file_type =="gene"
|
|
33 List_Unique_Variants_2_1_0.pl --File_Type G --Output_File $output
|
|
34 --Var_Type $file_types.variants
|
|
35 --Component $file_types.component
|
|
36 --Impact $file_types.impact
|
|
37 #if $file_types.data_sources.data_source == "in"
|
|
38 #for $g in $file_types.data_sources.genefiles <!--get each var file-->
|
|
39 --Input_File ${g.input}
|
|
40 #end for
|
|
41 #else
|
|
42 `cat $file_types.data_sources.genelist`
|
|
43 #end if
|
|
44 #end if
|
|
45 </command>
|
|
46
|
|
47 <outputs>
|
|
48 <data format="tabular" name="output" />
|
|
49 </outputs>
|
|
50
|
|
51 <inputs>
|
|
52 <conditional name="file_types">
|
|
53 <!--form field to select file type-->
|
|
54 <param name="file_type" type="select" label="Select the input file type">
|
|
55 <option value="var2" selected="True">var files, format 2.x</option>
|
|
56 <option value="var1">var files, format 1.x</option>
|
|
57 <option value="gene">gene files</option>
|
|
58 </param>
|
|
59
|
|
60 <when value="var2">
|
|
61 <!--form field to select all variant types to annotate-->
|
|
62 <param name="variants" label="Select variant types to include" type="select" multiple="true" >
|
|
63 <!--<validator type="no_options" message="Please select at least one variant type."/>-->
|
|
64 <option value="All" selected="true">All</option>
|
|
65 <option value="snp">snp</option>
|
|
66 <option value="ins">ins</option>
|
|
67 <option value="del">del</option>
|
|
68 <option value="sub">sub</option>
|
|
69 <option value="ref">ref</option>
|
|
70 </param>
|
|
71
|
|
72 <!--form field to select varScoresVAF-->
|
|
73 <param name="scoresVAF" type="select" label="Include varScoreVAF?">
|
|
74 <option value="--Scores_VAF yes" selected="true">yes</option>
|
|
75 <option value="--Scores_VAF no">no</option>
|
|
76 </param>
|
|
77 <!--form field to select varScoresEAF-->
|
|
78 <param name="scoresEAF" type="select" label="Include varScoreEAF?">
|
|
79 <option value="--Scores_EAF yes" selected="true">yes</option>
|
|
80 <option value="--Scores_EAF no">no</option>
|
|
81 </param>
|
|
82 <!--form field to select varQuality-->
|
|
83 <param name="varQuality" type="select" label="Include varQuality?">
|
|
84 <option value="--Score_Qualities yes" selected="true">yes</option>
|
|
85 <option value="--Score_Qualities no">no</option>
|
|
86 </param>
|
|
87
|
|
88 <!--conditional to select variant file input-->
|
|
89 <conditional name="data_sources">
|
|
90 <param name="data_source" type="select" label="Where are the input var files?">
|
|
91 <option value="in" selected="true">imported into Galaxy</option>
|
|
92 <option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
|
|
93 </param>
|
|
94 <when value="in">
|
|
95 <!--form field to select variant files-->
|
|
96 <repeat name="varfiles" title="Variant files">
|
|
97 <param name="input" type="data" format="cg_var" label="Dataset">
|
|
98 <validator type="unspecified_build" />
|
|
99 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
|
|
100 metadata_name="dbkey" metadata_column="1"
|
|
101 message="cgatools is not currently available for this build."/>
|
|
102 </param>
|
|
103 </repeat>
|
|
104 </when>
|
|
105 <when value="out">
|
|
106 <!--form field to select crr file-->
|
|
107 <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/>
|
|
108 </when>
|
|
109 </conditional>
|
|
110 </when>
|
|
111
|
|
112 <when value="var1">
|
|
113 <!--form field to select all variant types to annotate-->
|
|
114 <param name="variants" label="Select variant types to include" type="select" multiple="true" >
|
|
115 <!--<validator type="no_options" message="Please select at least one variant type."/>-->
|
|
116 <option value="All" selected="true">All</option>
|
|
117 <option value="snp">snp</option>
|
|
118 <option value="ins">ins</option>
|
|
119 <option value="del">del</option>
|
|
120 <option value="sub">sub</option>
|
|
121 <option value="ref">ref</option>
|
|
122 </param>
|
|
123
|
|
124 <!--form field to select scores-->
|
|
125 <param name="scores" type="select" label="Include totalScore?">
|
|
126 <option value="--Scores yes" selected="true">yes</option>
|
|
127 <option value="--Scores no">no</option>
|
|
128 </param>
|
|
129
|
|
130 <!--conditional to select variant file input-->
|
|
131 <conditional name="data_sources">
|
|
132 <param name="data_source" type="select" label="Where are the input var files?">
|
|
133 <option value="in" selected="true">imported into Galaxy</option>
|
|
134 <option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
|
|
135 </param>
|
|
136 <when value="in">
|
|
137 <!--form field to select variant files-->
|
|
138 <repeat name="varfiles" title="Variant files">
|
|
139 <param name="input" type="data" format="cg_var" label="Dataset">
|
|
140 <validator type="unspecified_build" />
|
|
141 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
|
|
142 metadata_name="dbkey" metadata_column="1"
|
|
143 message="cgatools is not currently available for this build."/>
|
|
144 </param>
|
|
145 </repeat>
|
|
146 </when>
|
|
147 <when value="out">
|
|
148 <!--form field to select crr file-->
|
|
149 <param name="varlist" type="text" label="List of variant files (/path/file)" size="200" help="file with list of var files (/path/varfile), var files can be compressed (gz, bz2)."/>
|
|
150 </when>
|
|
151 </conditional>
|
|
152 </when>
|
|
153
|
|
154 <when value="gene">
|
|
155 <!--form field to select all variant types to annotate-->
|
|
156 <param name="variants" label="Select variant types to include" type="select" multiple="true" >
|
|
157 <!--<validator type="no_options" message="Please select at least one variant type."/>-->
|
|
158 <option value="All" selected="true">All</option>
|
|
159 <option value="snp">snp</option>
|
|
160 <option value="ins">ins</option>
|
|
161 <option value="del">del</option>
|
|
162 <option value="sub">sub</option>
|
|
163 <option value="ref">ref</option>
|
|
164 </param>
|
|
165
|
|
166 <!--form field to select component in gene file-->
|
|
167 <param name="component" type="select" label="Select component types to include" multiple="true" >
|
|
168 <option value="All" selected="true">All</option>
|
|
169 <option value="CDS">CDS</option>
|
|
170 <option value="INTRON">INTRON</option>
|
|
171 <option value="DONOR">DONOR</option>
|
|
172 <option value="ACCEPTOR">ACCEPTOR</option>
|
|
173 <option value="TSS-UPSTREAM">TSS-UPSTREAM</option>
|
|
174 <option value="SPAN5">SPAN5</option>
|
|
175 <option value="SPAN3">SPAN3</option>
|
|
176 <option value="SPAN">SPAN</option>
|
|
177 <option value="UTR5">UTR5</option>
|
|
178 <option value="UTR3">UTR3</option>
|
|
179 <option value="UTR">UTR</option>
|
|
180 </param>
|
|
181
|
|
182 <!--form field to select impact in gene file-->
|
|
183 <param name="impact" type="select" label="Select impact types to include" multiple="true" >
|
|
184 <option value="All" selected="true">All</option>
|
|
185 <option value="NO-CHANGE">NO-CHANGE</option>
|
|
186 <option value="SYNONYMOUS">SYNONYMOUS</option>
|
|
187 <option value="MISSENES">MISSENES</option>
|
|
188 <option value="NONSENSE">NONSENSE</option>
|
|
189 <option value="NONSSTOP">NONSSTOP</option>
|
|
190 <option value="DELETE">DELETE</option>
|
|
191 <option value="INSERT">INSERT</option>
|
|
192 <option value="DELETE+">DELETE+</option>
|
|
193 <option value="INSERT+">INSERT+</option>
|
|
194 <option value="FRAMESHIFT">FRAMESHIFT</option>
|
|
195 <option value="MISSTART">MISSTART</option>
|
|
196 <option value="DISRUPT">DISRUPT</option>
|
|
197 <option value="UNKNOWN-VNC">UNKNOWN-VNC</option>
|
|
198 <option value="UNKNOWN-INC">UNKNOWN-INC</option>
|
|
199 <option value="UNKNOWN-TR">UNKNOWN-TR</option>
|
|
200 </param>
|
|
201
|
|
202 <!--conditional to select gene file input-->
|
|
203 <conditional name="data_sources">
|
|
204 <param name="data_source" type="select" label="Where are the input gene files?">
|
|
205 <option value="in" selected="true">imported into Galaxy</option>
|
|
206 <option value="out">located outside Galaxy (available only for local Galaxy instances)</option>
|
|
207 </param>
|
|
208 <when value="in">
|
|
209 <!--form field to select variant files-->
|
|
210 <repeat name="genefiles" title="Gene files">
|
|
211 <param name="input" type="data" format="cg_gene" label="Dataset">
|
|
212 <validator type="unspecified_build" />
|
|
213 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
|
|
214 metadata_name="dbkey" metadata_column="1"
|
|
215 message="cgatools is not currently available for this build."/>
|
|
216 </param>
|
|
217 </repeat>
|
|
218 </when>
|
|
219 <when value="out">
|
|
220 <!--form field to select crr file-->
|
|
221 <param name="genelist" type="text" label="List of gene files (/path/file)" size="200" help="file with list of gene files (/path/genefile), gene files can be compressed (gz, bz2)."/>
|
|
222 </when>
|
|
223 </conditional>
|
|
224 </when>
|
|
225
|
|
226 </conditional>
|
|
227 </inputs>
|
|
228
|
|
229
|
|
230 <help>
|
|
231
|
|
232 **What it does**
|
|
233
|
|
234 This tool identifies all called variants present in the var or gene files and generates annotated variant list.
|
|
235
|
|
236 -----
|
|
237
|
|
238 **Instructions**::
|
|
239
|
|
240 List Unique Variants for Pipeline 1.x and 2.x
|
|
241 [Uses header if available, checks for position of xref field if not]
|
|
242 Take one or more var or gene files
|
|
243 Extract a non-redundant set of variants
|
|
244
|
|
245 For var files:
|
|
246 The fields used to define non-redundant variants are are:
|
|
247 chromosome begin end varType reference alleleSeq xRef
|
|
248 User can nominate class(es) of varType to filter on
|
|
249 Outputs varScoreEAF, varScoreVAF and varQuality as a default but user can turn
|
|
250 them off (separately)
|
|
251 Scores and qualities stored in separate fields, all values for a variant across
|
|
252 a set of genomes.
|
|
253 Values for different genomes separated by ':', for two hom entries for the same
|
|
254 genome by '|'
|
|
255 Output is accepted by testvariants to generate a variant table, all fields kept
|
|
256 in testvariants output
|
|
257
|
|
258 For gene files:
|
|
259 The fields used to define non-redundant gene variants are:
|
|
260 chromosome begin end varType reference call xRef geneId mrnaAcc proteinAcc symbol
|
|
261 orientation component componentIndex codingRegionKnown impact nucleotidePos
|
|
262 proteinPos annotationRefSequence sampleSequence genomeRefSequence
|
|
263 User can nominate class(es) of varType, component or impact to filter on
|
|
264 All gene entries kept ie multiple entries if multiple transcripts
|
|
265
|
|
266 NB Now treating xref as a separate component in var recs, as it is not consistent
|
|
267 between X and Y vars
|
|
268 Not fixed for gene recs yet
|
|
269
|
|
270 perl List_Unique_Variants_2_0_11.pl
|
|
271 --File_Type [V|G]
|
|
272 --Input_File input_file_1 [set of var or gene files]
|
|
273 --Input_File input_file_2
|
|
274 ...
|
|
275 --Input_File input_file_n
|
|
276 --Output_File filename
|
|
277 --Var_Type [For both file types, 'All' or any value from the varType field,
|
|
278 multiple values allowed, separated by comma]
|
|
279 --Component [Gene file specific,'All' or any value from component field of gene
|
|
280 file, multiple allowed; 'All" is default]
|
|
281 --Impact All [Gene file specific,'All' or any value from impact field of gene
|
|
282 file, multiple allowed; 'All" is default]
|
|
283 --Scores [1.x var file specific, yes|no, yes is default]
|
|
284 --Scores_VAF [2.0 var file specific, yes|no, yes is default]
|
|
285 --Scores_EAF [2.0 var file specific, yes|no, yes is default]
|
|
286 --Score_Qualities [yes|no, yes is default]
|
|
287 eg
|
|
288 perl List_Unique_Variants_2_0_11.pl \
|
|
289 --File_Type V \
|
|
290 --Input_File /Yoruban_Trio_1100_37/GS19238-1100-37/GS00028-DNA_A01/ASM/gene-GS19238-1100-37-ASM.tsv.bz2 \
|
|
291 --Input_File /Yoruban_Trio_1100_37/GS19239-1100-37/GS00028-DNA_B01/ASM/gene-GS19239-1100-37-ASM.tsv.bz2 \
|
|
292 --Input_File /Yoruban_Trio_1100_37/GS19240-1100-37/GS00028-DNA_C01/ASM/gene-GS19240-1100-37-ASM.tsv.bz2 \
|
|
293 --Output_File /Users/rtearle/Documents/TBF/YRI_Trio_Protein_Coding.tsv \
|
|
294 --Var_Type All
|
|
295 --Component All
|
|
296 --Impact All
|
|
297 --Scores_VAF yes \
|
|
298 --Scores_EAF yes \
|
|
299 --Score_Qualities yes
|
|
300
|
|
301 var fields
|
|
302 1.x locus ploidy haplotype chromosome begin end varType reference alleleSeq
|
|
303 totalScore hapLink xRef
|
|
304 2.0 locus ploidy allele chromosome begin end varType reference alleleSeq
|
|
305 varScoreVAF varScoreEAF varQuality hapLink xRef
|
|
306
|
|
307 gene fields
|
|
308 1.x index locus allele chromosome begin end varType reference call xRef geneId
|
|
309 mrnaAcc proteinAcc symbol orientation component componentIndex
|
|
310 codingRegionKnown impact nucleotidePos proteinPos annotationRefSequence
|
|
311 sampleSequence genomeRefSequence
|
|
312 2.0 index locus allele chromosome begin end varType reference call xRef geneId
|
|
313 mrnaAcc proteinAcc symbol orientation component componentIndex hasCodingRegion
|
|
314 impact nucleotidePos proteinPos annotationRefSequence sampleSequence
|
|
315 genomeRefSequence pfam
|
|
316
|
|
317 Parsing and storing input parameters
|
|
318 Only input_file fields can be repeated
|
|
319 input paramaters are case insensitive
|
|
320
|
|
321
|
|
322 </help>
|
|
323 </tool>
|