Mercurial > repos > artbio > sigmut
comparison sigmut.xml @ 0:2062de974f72 draft default tip
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sigmut commit bba3eb3950b8772758cc6f19747172be7413ddd9"
author | artbio |
---|---|
date | Mon, 15 Jun 2020 00:28:49 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2062de974f72 |
---|---|
1 <tool id="SigProfiler" name="SigProfiler" version="@VERSION@"> | |
2 <description>performs mutational signature characterization from variant files</description> | |
3 | |
4 <macros> | |
5 <import>sigmut_macros.xml</import> | |
6 </macros> | |
7 <expand macro="requirements"/> | |
8 <expand macro="stdio"/> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 @VERSION@ | |
11 @pipefail@ | |
12 BIN=`which sigprofiler | sed 's,/sigprofiler,,g'` && | |
13 echo \$BIN && | |
14 chmod -R 777 \$BIN && | |
15 mkdir run_dir && | |
16 #if str( $set_analysis.choices ) == "get_sigmut": | |
17 #if str( $set_analysis.vcfile_input.vcfile ) == "maf": | |
18 #set $infile = 'run_dir/snps.maf' | |
19 ln -s -f '$set_analysis.vcfile_input.maf_file' '$infile' && | |
20 #else if str( $set_analysis.vcfile_input.vcfile ) == "icgc": | |
21 #set $infile = 'run_dir/snps.txt' | |
22 ln -s -f '$set_analysis.vcfile_input.icgc_file' '$infile' && | |
23 #else if str( $set_analysis.vcfile_input.vcfile ) == "vcf": | |
24 #set $infile = 'run_dir/snps.vcf' | |
25 ln -s -f '$set_analysis.vcfile_input.vcf_file' '$infile' && | |
26 #end if | |
27 #end if | |
28 | |
29 sigprofiler | |
30 | |
31 #if str( $set_analysis.choices ) == "install_genome": | |
32 -ig $set_analysis.refgendwn > install.log | |
33 #else if str( $set_analysis.choices ) == "get_sigmut": | |
34 -g $set_analysis.refgendat | |
35 -f 'run_dir' | |
36 -n "project" | |
37 -p | |
38 ## ! implement exome functionality when good test available | |
39 ## #if str( $set_analysis.exome ) == "true": | |
40 ## -e | |
41 ## #end if | |
42 ## ! implement per chromosome functionality when good test available | |
43 ## #if str( $set_analysis.chrom_based ) == "true": | |
44 ## -c | |
45 ## #end if | |
46 #if str( $set_analysis.tsb_stat ) == "true": | |
47 -t | |
48 #end if | |
49 #if str( $set_analysis.gs ) == "true": | |
50 -s | |
51 #end if | |
52 ##-b $set_analysis.bed ### to be done | |
53 && pdfcombine -f -s -o blinder.pdf run_dir/output/plots/*.pdf | |
54 && ls run_dir/logs/ | |
55 #if str( $set_analysis.tsb_stat ) == "true": | |
56 && tail -n +1 run_dir/output/TSB/*.txt > transcriptional_strand_biases.txt | |
57 #end if | |
58 #if $set_analysis.seqInfo: | |
59 && tail -n +1 run_dir/output/*/*.all > information.txt | |
60 #end if | |
61 #end if | |
62 ]]></command> | |
63 | |
64 <inputs> | |
65 <conditional name="set_analysis"> | |
66 <param name="choices" type="select" label="Which of the following jobs do you want perform?"> | |
67 <option value="install_genome">Install 'de novo' a reference genome </option> | |
68 <option value="get_sigmut">Obtain the mutational signatures from VCF files</option> | |
69 </param> | |
70 <when value="install_genome"> | |
71 <param name="refgendwn" type="select" label="Reference genome" help="Get data from any of the following reference genomes:"> | |
72 <option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option> | |
73 <option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option> | |
74 <option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option> | |
75 <option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option> | |
76 <option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option> | |
77 <option value="c_elegans">Caenorhabditis elegans</option> | |
78 <option value="dog">Dog</option> | |
79 </param> | |
80 </when> | |
81 | |
82 <when value="get_sigmut"> | |
83 <conditional name="vcfile_input"> | |
84 <param name="vcfile" type="select" label="VC file" help="Select the format of your input data"> | |
85 <option value="maf">Mutation Annotation Format</option> | |
86 <option value="icgc">Tab-separated file</option> | |
87 <option value="vcf">Variant Call Format</option> | |
88 </param> | |
89 <when value='maf'> | |
90 <param name="maf_file" type="data" format="maf" label="select VC file" help="Select the input file in MAF format." /> | |
91 </when> | |
92 <when value='icgc'> | |
93 <param name="icgc_file" type="data" format="txt" label="select VC file" help="Select the input file in ICGC format." /> | |
94 </when> | |
95 <when value='vcf'> | |
96 <param name="vcf_file" type="data" format="vcf" label="select VC file" help="Select the input file in VCF format." /> | |
97 </when> | |
98 </conditional> | |
99 | |
100 <param name="refgendat" type="select" label="Reference genome to be analyzed" help="Use the following reference genome:"> | |
101 <option value="GRCh37">Homo sapiens, GRCh37.p13 [GCA_000001405.14] </option> | |
102 <option value="GRCh38">Homo sapiens, GRCh38.p12 [GCA_000001405.27] </option> | |
103 <option value="mm9">Mus musculus, GRCm37 [GCA_000001635.18]</option> | |
104 <option value="mm10">Mus musculus, GRCm38.p6 [GCA_000001635.8]</option> | |
105 <option value="rn6">Rattus norvegicus, Rnor_6.0 [GCA_000001895.4]</option> | |
106 <option value="c_elegans">Caenorhabditis elegans</option> | |
107 <option value="dog">Dog</option> | |
108 </param> | |
109 | |
110 <!-- implement bed when test available --> | |
111 <!-- <conditional name="bed_input"> | |
112 <param name="bedfile" type="select" label="BED file" help="Input a BED file"> | |
113 <option value="yes">Yes</option> | |
114 <option value="no" selected="true">No</option> | |
115 </param> | |
116 <when value='yes'> | |
117 <param name="bed_file" format="bed" type="data" label="Use a BED file containing the set of regions" help="Provide a BED file"/> | |
118 </when> | |
119 <when value='no'> | |
120 </when> | |
121 </conditional> --> | |
122 <!-- implement exome functionality when test available --> | |
123 <!-- <param name="exome" type="boolean" label="Use only the exome?" checked="False" help="Use exome"/> --> | |
124 <!-- implement chrom_based functionality when test available --> | |
125 <!--<param name="chrom_based" type="boolean" label="Create the matrices on a per chromosome basis?" checked="False" help="Show snvs"/> --> | |
126 <param name="tsb_stat" type="boolean" truevalue="true" label="Performs a transcriptional strand bias test?" checked="False" help="Show snvs"/> | |
127 <param name="seqInfo" type="boolean" truevalue="true" label="Export sequence information?" checked="False" help="Show sequence information"/> | |
128 <param name="gs" type="boolean" label="Performs gene strand bias test?" checked="False" help="Show snvs"/> | |
129 </when> | |
130 </conditional> | |
131 </inputs> | |
132 | |
133 <outputs> | |
134 <data format="txt" name="logref" label="Log file: Install a Reference Genome" | |
135 from_work_dir="./install.log"> | |
136 <filter>set_analysis['choices'] == 'install_genome'</filter> | |
137 </data> | |
138 <data format="txt" name="logsmt" label="Log file: Calculate Mutational Signatures" | |
139 from_work_dir="run_dir/logs/SigProfilerMatrixGenerator*.out"> | |
140 <filter>set_analysis['choices'] == 'get_sigmut'</filter> | |
141 </data> | |
142 | |
143 <data format="pdf" name="blinder" label="SBS Mutational Signatures plots (pdf)" | |
144 from_work_dir="./blinder.pdf" > | |
145 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['plot'] is True</filter> | |
146 </data> | |
147 | |
148 <!-- implement exome outputs when test available --> | |
149 <!-- | |
150 <data format="txt" name="dbs_exome" label="DBS_exome.vcf"> | |
151 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | |
152 </data> | |
153 <data format="txt" name="snv_exome" label="SNV_exome.vcf"> | |
154 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | |
155 </data> | |
156 | |
157 <data format="txt" name="sig_exome" label="DBS 78 and so on Sig. Mut. EXOME"> | |
158 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['exome'] is True</filter> | |
159 </data> | |
160 --> | |
161 <data format="txt" name="tsb" label="Transcriptional Strand Biases" | |
162 from_work_dir="./transcriptional_strand_biases.txt" > | |
163 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['tsb_stat'] is True</filter> | |
164 </data> | |
165 | |
166 <data format="txt" name="seqinfo" label="Mutational Signature detailed infos" | |
167 from_work_dir="./information.txt" > | |
168 <filter>set_analysis['choices'] == 'get_sigmut' and set_analysis['seqInfo'] is True</filter> | |
169 </data> | |
170 | |
171 </outputs> | |
172 <tests> | |
173 <test> | |
174 <param name="choices" value="install_genome"/> | |
175 <param name="refgendwn" value="GRCh38"/> | |
176 <output name="logref" file="hg38_install.log" lines_diff="5"/> | |
177 </test> | |
178 <test> | |
179 <param name="choices" value="get_sigmut"/> | |
180 <param name="refgendat" value="GRCh38"/> | |
181 <param name="vcfile" value="vcf"/> | |
182 <param name="vcf_file" ftype="vcf" value="hg38.vcf"/> | |
183 <param name="plot" value="True"/> | |
184 <output name="logsmt" ftype="txt" file="sigmut.log" lines_diff="5" /> | |
185 <output name="blinder" file="hg38_blinder.pdf" lines_diff="5" /> | |
186 </test> | |
187 </tests> | |
188 | |
189 <help><![CDATA[ | |
190 | |
191 **SigProfiler** | |
192 | |
193 Background: | |
194 | |
195 Cancer genomes evince somatic mutations, which are imprinted by | |
196 different mutational processes, that give rise to diverse | |
197 mutational signatures. Their analysis from single base | |
198 substitutions and their immediate sequencing context, allows the | |
199 classification of small mutational events (including | |
200 substitutions, insertions, deletions, and doublet substitutions) | |
201 for better understanding the mutational processes that have | |
202 shaped a cancer genome. | |
203 | |
204 In this sense, SigProfiler constitutes a Galaxy-based wrapper of | |
205 a computational method developed by Ludmil B. Alexandrov, that | |
206 allow the exploration and visualization of mutational patterns | |
207 for all types of small mutational events. Specifically, the | |
208 following actions can be performed using SigProfiler wrapper: | |
209 | |
210 1. Identify and categorize the mutations based on possible | |
211 single nucleotide variants (SNVs), double base substitutions | |
212 (DBS), and insertions/deletions and provides further | |
213 transcriptional strand bias categorization. Afterwards, the | |
214 classification of these mutations are integrated into distinct | |
215 matrices. | |
216 SigProfiler provides matrix generation support for SBS-6, | |
217 SBS-96, SBS-1536, DBS-78 and DBS-1248. In addition, the | |
218 generation of mutational matrices of indels including | |
219 ID-28 and ID-83 are procured. Besides, an ID-8628 matrix that | |
220 extends the ID-83 classification is generated. | |
221 SigProfiler examines transcriptional strand bias for single base | |
222 substitutions, doublet base substitutions, and small indels. It | |
223 is evaluated whether a mutation occurs on the transcribed or the | |
224 non-transcribed strand of well-annotated protein coding genes of | |
225 a reference genome. Mutations found in the transcribed regions | |
226 of the genome are further subclassified as: (i) transcribed, | |
227 (ii) un-transcribed, (iii) bi-directional, or (iv) unknown. | |
228 | |
229 2. Generation of plots of all types of mutational signatures as | |
230 well as all types of mutational patterns in cancer genomes. | |
231 | |
232 Additional Information: | |
233 | |
234 Classification of Single Base substitutions (SBSs): | |
235 Single base substitutions (SBSs) are single DNA base-pairs | |
236 substituted with another single DNA base-pairs. The most | |
237 basic classification catalogues SBSs into six distinct | |
238 categories, including: C:G > A:T, C:G > G:C, C:G > T:A, | |
239 T:A > A:T, T:A > C:G, and T:A > G:C. In practice, a C:G > A:T | |
240 substitution is denoted as either a C > A mutation using the | |
241 pyrimidine base or as a G > T mutation using the purine base. | |
242 In consequence, the most commonly used SBS-6 classification of | |
243 single base substitutions can be written as: C > A, C > G, | |
244 C > T, T > A, T > C, and T > G. | |
245 Additionally, the SBS-6 classification can be further | |
246 expanded by considering the base-pairs immediately | |
247 adjacent 5′ and 3′ to the somatic mutation. Therefore, an | |
248 extended classification for analysis of mutational signatures is | |
249 SBS-96, where each of the classes in SBS-6 is further elaborated | |
250 using one base adjacent at the 5′ of the mutation and one base | |
251 adjacent at the 3′ of the mutation. | |
252 Logically, SBS-96 can be further elaborated by including | |
253 additional 5′ and 3′ adjacent context. Each of the six single | |
254 base substitutions in SBS-6 has 256 possible pentanucleotides | |
255 resulting in a classification with 1536 possible channels. | |
256 | |
257 Classification of Doublet Base substitutions (DBSs): | |
258 Doublet base substitutions (DBSs) are somatic mutations in which | |
259 a set of two adjacent DNA base-pairs is simultaneously | |
260 substituted with another set of two adjacent DNA base-pairs. An | |
261 example of a DBS is a set of CT:GA base-pairs mutating to a set | |
262 of AA:TT base-pairs, which is usually denoted as CT:GA > AA:TT. | |
263 It should be noted that a CT:GA > AA:TT mutation can be | |
264 equivalently written as either a CT > AA mutation. Overall, the | |
265 basic classification catalogues DBSs into 78 distinct categories | |
266 denoted as the DBS-78 matrix. | |
267 Similarly, we can expand the characterization of DBS mutations | |
268 by considering the 5′ and 3′ adjacent contexts. With | |
269 seventy-eight possible DBS mutations having sixteen possible | |
270 tetranucleotides each, this context expansion results in 1248 | |
271 possible channels denoted as the DBS-1248 context. | |
272 | |
273 Classification of small insertions and deletions (IDs): | |
274 A somatic insertion is the incorporation of a set of base-pairs | |
275 that lengthens a chromosome, while a somatic deletion is the | |
276 removing of a set of existing base-pairs from a given location | |
277 of a chromosome. | |
278 Unfortunately, indel classification cannot be performed | |
279 analogously to SBS or DBS classifications, where the immediate | |
280 sequencing context flanking each mutation was | |
281 utilized to subclassify these mutational events. | |
282 Consequently, indels (IDs) are classified as single base-pair | |
283 or longer events. They can be further subclassified as either a | |
284 C:G or a T:A indel, while longer indels can also be | |
285 subclassified based on their lengths: 2 bp, 3 bp, 4 bp, and | |
286 5 + bp. | |
287 | |
288 Incorporation of transcription Strand Bias (TSB): | |
289 The mutational classifications described above allow the | |
290 characterization of mutational patterns of single base | |
291 substitutions, doublet base substitutions, and small insertions | |
292 and deletions. Nevertheless, these classifications can be | |
293 further elaborated by incorporating strand bias. Mutations | |
294 from the same type are expected to be equally distributed across the two | |
295 DNA strands. However, in many cases an asymmetric number of mutations are | |
296 observed due to either one of the strands being preferentially | |
297 repaired or one of the strands having a higher propensity for | |
298 being damaged. To sub-classify mutations based on their | |
299 transcriptional strand bias, the pyrimidine orientation with | |
300 respect to the locations of well-annotated protein coding genes | |
301 on a genome is considered. | |
302 | |
303 Running SigProfiler: | |
304 | |
305 1. Reference Genomes: | |
306 Before using SigProfiler, the installation of a reference genome | |
307 is demanded. By default, the tool supports the following | |
308 reference genomes: | |
309 | |
310 Human: GRCh37 & GRCh38 | |
311 | |
312 Mouse: mm9 & mm10 | |
313 | |
314 Rat: rn6 | |
315 | |
316 Nematode: c_elegans | |
317 | |
318 A right command line should look like: | |
319 | |
320 sigprofiler -ig GRCh37 | |
321 | |
322 2. Mutational signatures calculation: | |
323 | |
324 After successful installation of a reference genome, SigProfiler | |
325 can be applied to files containing somatic mutations in multiple | |
326 formats, for transforming these mutational catalogues into mutational | |
327 matrices. Specifically, the tool can read data formats such as | |
328 Variant Calling Format (VCF) and Mutation Annotation Format | |
329 (MAF) and the following parameters should be provided for | |
330 generating the diverse matrices and plots: | |
331 | |
332 --name | -n = Project name | |
333 --genome | -g = Reference Genome | |
334 -files | -f = Absolute path where the input mutation files are located | |
335 | |
336 A right command line should look like: | |
337 | |
338 sigprofiler -n MYPROJECT -g GRCh37 -f /path_to_folder_with_VCF_files/ -p | |
339 | |
340 **Options** | |
341 --version show program's version number and exit | |
342 | |
343 -h, --help show this help message and exit | |
344 | |
345 --install_genome Install de novo any of the following reference | |
346 genomes: 'GRCh37', 'GRCh38', 'mm9' or 'mm10'. | |
347 | |
348 --name=APPENDIX Provide a project name | |
349 | |
350 --genome=NAME Provide a reference genome (ex: GRCh37, GRCh38, | |
351 mm9 or mm10). | |
352 | |
353 --files=Abs_path Path where the input vcf files are located | |
354 | |
355 --exome Use only the exome or not | |
356 | |
357 --bed=FILE BED file containing the set of regions to be used | |
358 in generating the matrices | |
359 | |
360 --chrom Create the matrices on a per chromosome basis | |
361 | |
362 --plot Generate the plots for each context | |
363 | |
364 --tsb Performs a transcriptional strand bias test for the | |
365 24, 384, and 6144 contexts | |
366 | |
367 --gs Performs a gene strand bias test | |
368 | |
369 For further info see: https://github.com/AlexandrovLab/SigProfilerMatrixGenerator | |
370 | |
371 ]]></help> | |
372 | |
373 <citations> | |
374 <citation type="doi">10.1186/s12864-019-6041-2</citation> | |
375 </citations> | |
376 | |
377 </tool> |