Mercurial > repos > matthias > stacks2_denovomap
comparison stacks_denovomap.xml @ 0:e5e5617c064a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit 98327d2948ae1ccb5aef5db9ab88605fd74a0de7-dirty
author | matthias |
---|---|
date | Thu, 29 Nov 2018 11:50:28 -0500 |
parents | |
children | 58b5c841fa51 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e5e5617c064a |
---|---|
1 <tool id="stacks2_denovomap" name="Stacks2: de novo map" version="@WRAPPER_VERSION@"> | |
2 <description>the Stacks pipeline without a reference genome (denovo_map.pl)</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"/> | |
7 <expand macro="stdio"/> | |
8 <command><![CDATA[ | |
9 @CLEAN_EXT@ | |
10 mkdir stacks_inputs stacks_outputs&& | |
11 | |
12 ## link input reads | |
13 ## - for paired lists NAME.1.EXT and NAME.2.EXT files are created | |
14 ## - otherwise NAME.EXT | |
15 #for $sample in $input_type.samples | |
16 #if $sample.is_collection: | |
17 #set $read_direction="forward" | |
18 @FASTQ_INPUT@ | |
19 #set $read_direction="reverse" | |
20 @FASTQ_INPUT@ | |
21 #else | |
22 #set $read_direction=None | |
23 @FASTQ_INPUT@ | |
24 #end if | |
25 #end for | |
26 | |
27 denovo_map.pl | |
28 --samples stacks_inputs | |
29 #if str($popmap) != 'None': | |
30 --popmap '$popmap' | |
31 #end if | |
32 -o stacks_outputs | |
33 -T \${GALAXY_SLOTS:-1} | |
34 | |
35 -M $assembly_options.M | |
36 -n $assembly_options.n | |
37 --var-alpha $model_options.var_alpha | |
38 --gt-alpha $model_options.gt_alpha | |
39 $paired | |
40 | |
41 | |
42 | |
43 ## the catalog.calls output is a gzip-ed vcf extract it | |
44 ## to make it usable in Galaxy (with the downside that we | |
45 ## need to gzip it again for downstream calls like populations) | |
46 && gunzip -c stacks_outputs/catalog.calls > stacks_outputs/catalog.calls.vcf | |
47 && mv stacks_outputs/denovo_map.log $output_log | |
48 ]]></command> | |
49 | |
50 <inputs> | |
51 <expand macro="fastq_input_macro"/> | |
52 <param argument="--popmap" type="data" format="tabular,txt" label="Population map" /> | |
53 <section name="assembly_options" title="Assembly options" expanded="true"> | |
54 <param name="M" argument="-M" type="integer" value="2" label="Number of mismatches allowed between loci when processing a single individual" help="used in ustacks"/> | |
55 <param name="n" argument="-n" type="integer" value="1" label="Number of mismatches allowed between loci when building the catalog" help="used in cstacks; suggested: set to -M"/> | |
56 </section> | |
57 <section name="model_options" title="Variant calling options (for gstacks)" expanded="true"> | |
58 <expand macro="variant_calling_options_vg"/> | |
59 </section> | |
60 <param argument="--paired" type="boolean" checked="false" truevalue="--paired" falsevalue="" label="Use paired-end reads to assemble mini-contigs" help="After assembling RAD loci (in the tsv2bam step)" /> | |
61 <expand macro="in_log"/> | |
62 </inputs> | |
63 <outputs> | |
64 <expand macro="out_log"/> <!-- pipeline also writes tsv2bam.log, gstacks.log, populations.log .. could be a collection --> | |
65 <expand macro="ustacks_outputs_macro" tooladd="(ustacks)"/> | |
66 <expand macro="cstacks_outputs_macro" tooladd="(cstacks)"/> | |
67 <expand macro="sstacks_outputs_macro" tooladd="(sstacks)"/> | |
68 <expand macro="tsv2bam_outputs_macro" tooladd="(tsv2bam)"/> | |
69 <expand macro="gstacks_outputs_macro" tooladd="(gstacks)"/> | |
70 <expand macro="populations_output_light" tooladd="(populations)"/> | |
71 </outputs> | |
72 | |
73 <tests> | |
74 <!-- SE input as multi selection, defaults testing against the output of the pipeline components --> | |
75 <test> | |
76 <param name="input_type|input_type_selector" value="manual"/> | |
77 <param name="input_type|samples" value="demultiplexed/PopA_01.1.fq,demultiplexed/PopA_02.1.fq" ftype="fastqsanger" /> | |
78 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" /> | |
79 <output_collection name="tabs" count="6"> | |
80 <element name="PopA_01.tags" file="ustacks/PopA_01.tags.tsv" ftype="tabular" lines_diff="2"/> | |
81 <element name="PopA_01.snps" file="ustacks/PopA_01.snps.tsv" ftype="tabular" lines_diff="2"/> | |
82 <element name="PopA_01.alleles" file="ustacks/PopA_01.alleles.tsv" ftype="tabular" lines_diff="2"/> | |
83 <element name="PopA_02.tags" file="ustacks/PopA_02.tags.tsv" ftype="tabular" lines_diff="2"/> | |
84 <element name="PopA_02.snps" file="ustacks/PopA_02.snps.tsv" ftype="tabular" lines_diff="2"/> | |
85 <element name="PopA_02.alleles" file="ustacks/PopA_02.alleles.tsv" ftype="tabular" lines_diff="2"/> | |
86 </output_collection> | |
87 <output_collection name="catalog" type="list" count="3"> | |
88 <element name="catalog.alleles" file="cstacks/catalog.alleles.tsv" ftype="tabular" lines_diff="2"/> | |
89 <element name="catalog.snps" file="cstacks/catalog.snps.tsv" ftype="tabular" lines_diff="2" /> | |
90 <element name="catalog.tags" file="cstacks/catalog.tags.tsv" ftype="tabular" lines_diff="2" /> | |
91 </output_collection> | |
92 <output_collection name="matches" type="list" count="2"> | |
93 <element name="PopA_01.matches" file="sstacks/PopA_01.matches.tsv" ftype="tabular" lines_diff="2"/> | |
94 <element name="PopA_02.matches" file="sstacks/PopA_02.matches.tsv" ftype="tabular" lines_diff="2"/> | |
95 </output_collection> | |
96 <output_collection name="bams" type="list" count="2"> | |
97 <element name="PopA_01.matches" file="tsv2bam/PopA_01.matches.bam" ftype="bam" compare="sim_size"/> | |
98 <element name="PopA_02.matches" file="tsv2bam/PopA_02.matches.bam" ftype="bam" compare="sim_size" /> | |
99 </output_collection> | |
100 <output_collection name="gstacks_out" type="list" count="2"> | |
101 <element name="catalog.calls.vcf" file="gstacks/catalog.calls.tsv" ftype="vcf" lines_diff="2"/> | |
102 <element name="catalog.fa.gz" file="gstacks/catalog.fa.gz" ftype="fasta.gz" compare="sim_size"/> | |
103 </output_collection> | |
104 <output ftype="tabular" name="out_haplotypes" value="populations/populations.haplotypes.tsv"/> | |
105 <output ftype="tabular" name="out_hapstats" value="populations/populations.hapstats.tsv"/> | |
106 <output ftype="txt" name="out_populations_log_distribs" value="populations/populations.log.distribs"/> | |
107 <output ftype="tabular" name="out_sumstats_sum" value="populations/populations.sumstats_summary.tsv"/> | |
108 <output ftype="tabular" name="out_sumstats" value="populations/populations.sumstats.tsv"/> | |
109 <output ftype="tabular" name="out_sql" value="populations/populations.markers.tsv"/> | |
110 </test> | |
111 <!-- SE input as list, non-defaults, testing only correct size of the collections --> | |
112 <test> | |
113 <param name="input_type|input_type_selector" value="list"/> | |
114 <param name="input_type|samples"> | |
115 <collection type="list"> | |
116 <element name="PopA_01" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger" /> | |
117 <element name="PopA_02" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger"/> | |
118 </collection> | |
119 </param> | |
120 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" /> | |
121 <param name="assembly_options|M" value="3" /> | |
122 <param name="assembly_options|n" value="3" /> | |
123 <param name="model_options|var_alpha" value="0.1" /> | |
124 <param name="model_options|gt_alpha" value="0.1" /> | |
125 <assert_command> | |
126 <has_text text="-M 3" /> | |
127 <has_text text="-n 3" /> | |
128 <has_text text="--var-alpha 0.1" /> | |
129 <has_text text="--gt-alpha 0.1" /> | |
130 </assert_command> | |
131 <output_collection name="tabs" count="6"/> | |
132 <output_collection name="catalog" type="list" count="3"/> | |
133 <output_collection name="matches" type="list" count="2"/> | |
134 <output_collection name="bams" type="list" count="2"/> | |
135 <output_collection name="gstacks_out" type="list" count="2"/> | |
136 </test> | |
137 <!-- paired input, deafults (not using reverse reads), testing against the output of the pipeline components (note, this is equal because reverse reads are not used) --> | |
138 <test> | |
139 <param name="input_type|input_type_selector" value="list"/> | |
140 <param name="input_type|samples"> | |
141 <collection type="list:paired"> | |
142 <element name="PopA_01"> | |
143 <collection type="paired"> | |
144 <element name="forward" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger" /> | |
145 <element name="reverse" value="demultiplexed/PopA_01.2.fq" ftype="fastqsanger"/> | |
146 </collection> | |
147 </element> | |
148 <element name="PopA_02"> | |
149 <collection type="paired"> | |
150 <element name="forward" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger" /> | |
151 <element name="reverse" value="demultiplexed/PopA_02.2.fq" ftype="fastqsanger"/> | |
152 </collection> | |
153 </element> | |
154 </collection> | |
155 </param> | |
156 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" /> | |
157 <output_collection name="tabs" count="6"> | |
158 <element name="PopA_01.tags" file="ustacks/PopA_01.tags.tsv" ftype="tabular" lines_diff="2"/> | |
159 <element name="PopA_01.snps" file="ustacks/PopA_01.snps.tsv" ftype="tabular" lines_diff="2"/> | |
160 <element name="PopA_01.alleles" file="ustacks/PopA_01.alleles.tsv" ftype="tabular" lines_diff="2"/> | |
161 <element name="PopA_02.tags" file="ustacks/PopA_02.tags.tsv" ftype="tabular" lines_diff="2"/> | |
162 <element name="PopA_02.snps" file="ustacks/PopA_02.snps.tsv" ftype="tabular" lines_diff="2"/> | |
163 <element name="PopA_02.alleles" file="ustacks/PopA_02.alleles.tsv" ftype="tabular" lines_diff="2"/> | |
164 </output_collection> | |
165 <output_collection name="catalog" type="list" count="3"> | |
166 <element name="catalog.alleles" file="cstacks/catalog.alleles.tsv" ftype="tabular" lines_diff="2"/> | |
167 <element name="catalog.snps" file="cstacks/catalog.snps.tsv" ftype="tabular" lines_diff="2" /> | |
168 <element name="catalog.tags" file="cstacks/catalog.tags.tsv" ftype="tabular" lines_diff="2" /> | |
169 </output_collection> | |
170 <output_collection name="matches" type="list" count="2"> | |
171 <element name="PopA_01.matches" file="sstacks/PopA_01.matches.tsv" ftype="tabular" lines_diff="2"/> | |
172 <element name="PopA_02.matches" file="sstacks/PopA_02.matches.tsv" ftype="tabular" lines_diff="2"/> | |
173 </output_collection> | |
174 <output_collection name="bams" type="list" count="2"> | |
175 <element name="PopA_01.matches" file="tsv2bam/PopA_01.matches.bam" ftype="bam" compare="sim_size"/> | |
176 <element name="PopA_02.matches" file="tsv2bam/PopA_02.matches.bam" ftype="bam" compare="sim_size" /> | |
177 </output_collection> | |
178 <output_collection name="gstacks_out" type="list" count="2"> | |
179 <element name="catalog.calls.vcf" file="gstacks/catalog.calls.tsv" ftype="vcf" lines_diff="2"/> | |
180 <element name="catalog.fa.gz" file="gstacks/catalog.fa.gz" ftype="fasta.gz" compare="sim_size"/> | |
181 </output_collection> | |
182 <output ftype="tabular" name="out_haplotypes" value="populations/populations.haplotypes.tsv"/> | |
183 <output ftype="tabular" name="out_hapstats" value="populations/populations.hapstats.tsv"/> | |
184 <output ftype="txt" name="out_populations_log_distribs" value="populations/populations.log.distribs"/> | |
185 <output ftype="tabular" name="out_sumstats_sum" value="populations/populations.sumstats_summary.tsv"/> | |
186 <output ftype="tabular" name="out_sumstats" value="populations/populations.sumstats.tsv"/> | |
187 <output ftype="tabular" name="out_sql" value="populations/populations.markers.tsv"/> | |
188 </test> | |
189 <!-- paired input using also reverse reads, testing number of collection outputs --> | |
190 <test> | |
191 <param name="input_type|input_type_selector" value="list"/> | |
192 <param name="input_type|samples"> | |
193 <collection type="list:paired"> | |
194 <element name="PopA_01"> | |
195 <collection type="paired"> | |
196 <element name="forward" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger" /> | |
197 <element name="reverse" value="demultiplexed/PopA_01.2.fq" ftype="fastqsanger"/> | |
198 </collection> | |
199 </element> | |
200 <element name="PopA_02"> | |
201 <collection type="paired"> | |
202 <element name="forward" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger" /> | |
203 <element name="reverse" value="demultiplexed/PopA_02.2.fq" ftype="fastqsanger"/> | |
204 </collection> | |
205 </element> | |
206 </collection> | |
207 </param> | |
208 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" /> | |
209 <param name="paired" value="--paired" /> | |
210 <output_collection name="tabs" count="6"/> | |
211 <output_collection name="catalog" type="list" count="3"/> | |
212 <output_collection name="matches" type="list" count="2"/> | |
213 <output_collection name="bams" type="list" count="2"/> | |
214 <output_collection name="gstacks_out" type="list" count="2"/> | |
215 </test> | |
216 </tests> | |
217 | |
218 <help> | |
219 <![CDATA[ | |
220 .. class:: infomark | |
221 | |
222 **What it does** | |
223 | |
224 This program will run each of the Stacks components: first, running ustacks on each of the samples specified, building loci and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci that were marked as 'parents' or 'samples' on the command line, and finally, sstacks will be executed to match each sample against the catalog. A bit more detail on this process can be found in the FAQ. The denovo_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching, the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering. | |
225 | |
226 -------- | |
227 | |
228 **Input files** | |
229 | |
230 FASTQ, FASTA | |
231 | |
232 - Population map:: | |
233 | |
234 indv_01 1 | |
235 indv_02 1 | |
236 indv_03 1 | |
237 indv_04 2 | |
238 indv_05 2 | |
239 indv_06 2 | |
240 | |
241 | |
242 **Output files** | |
243 | |
244 | |
245 - XXX.tags.tsv file: | |
246 | |
247 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_ | |
248 | |
249 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence. | |
250 | |
251 | |
252 - XXX.snps.tsv file: | |
253 | |
254 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_ | |
255 | |
256 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one. | |
257 | |
258 | |
259 - XXX.alleles.tsv file: | |
260 | |
261 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_ | |
262 | |
263 | |
264 - XXX.matches.tsv file: | |
265 | |
266 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_ | |
267 | |
268 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample. | |
269 | |
270 | |
271 - other files: | |
272 | |
273 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_ | |
274 | |
275 @STACKS_INFOS@ | |
276 ]]> | |
277 </help> | |
278 <expand macro="citation" /> | |
279 </tool> |