comparison stacks_denovomap.xml @ 0:e5e5617c064a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit 98327d2948ae1ccb5aef5db9ab88605fd74a0de7-dirty
author matthias
date Thu, 29 Nov 2018 11:50:28 -0500
parents
children 58b5c841fa51
comparison
equal deleted inserted replaced
-1:000000000000 0:e5e5617c064a
1 <tool id="stacks2_denovomap" name="Stacks2: de novo map" version="@WRAPPER_VERSION@">
2 <description>the Stacks pipeline without a reference genome (denovo_map.pl)</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <command><![CDATA[
9 @CLEAN_EXT@
10 mkdir stacks_inputs stacks_outputs&&
11
12 ## link input reads
13 ## - for paired lists NAME.1.EXT and NAME.2.EXT files are created
14 ## - otherwise NAME.EXT
15 #for $sample in $input_type.samples
16 #if $sample.is_collection:
17 #set $read_direction="forward"
18 @FASTQ_INPUT@
19 #set $read_direction="reverse"
20 @FASTQ_INPUT@
21 #else
22 #set $read_direction=None
23 @FASTQ_INPUT@
24 #end if
25 #end for
26
27 denovo_map.pl
28 --samples stacks_inputs
29 #if str($popmap) != 'None':
30 --popmap '$popmap'
31 #end if
32 -o stacks_outputs
33 -T \${GALAXY_SLOTS:-1}
34
35 -M $assembly_options.M
36 -n $assembly_options.n
37 --var-alpha $model_options.var_alpha
38 --gt-alpha $model_options.gt_alpha
39 $paired
40
41
42
43 ## the catalog.calls output is a gzip-ed vcf extract it
44 ## to make it usable in Galaxy (with the downside that we
45 ## need to gzip it again for downstream calls like populations)
46 && gunzip -c stacks_outputs/catalog.calls > stacks_outputs/catalog.calls.vcf
47 && mv stacks_outputs/denovo_map.log $output_log
48 ]]></command>
49
50 <inputs>
51 <expand macro="fastq_input_macro"/>
52 <param argument="--popmap" type="data" format="tabular,txt" label="Population map" />
53 <section name="assembly_options" title="Assembly options" expanded="true">
54 <param name="M" argument="-M" type="integer" value="2" label="Number of mismatches allowed between loci when processing a single individual" help="used in ustacks"/>
55 <param name="n" argument="-n" type="integer" value="1" label="Number of mismatches allowed between loci when building the catalog" help="used in cstacks; suggested: set to -M"/>
56 </section>
57 <section name="model_options" title="Variant calling options (for gstacks)" expanded="true">
58 <expand macro="variant_calling_options_vg"/>
59 </section>
60 <param argument="--paired" type="boolean" checked="false" truevalue="--paired" falsevalue="" label="Use paired-end reads to assemble mini-contigs" help="After assembling RAD loci (in the tsv2bam step)" />
61 <expand macro="in_log"/>
62 </inputs>
63 <outputs>
64 <expand macro="out_log"/> <!-- pipeline also writes tsv2bam.log, gstacks.log, populations.log .. could be a collection -->
65 <expand macro="ustacks_outputs_macro" tooladd="(ustacks)"/>
66 <expand macro="cstacks_outputs_macro" tooladd="(cstacks)"/>
67 <expand macro="sstacks_outputs_macro" tooladd="(sstacks)"/>
68 <expand macro="tsv2bam_outputs_macro" tooladd="(tsv2bam)"/>
69 <expand macro="gstacks_outputs_macro" tooladd="(gstacks)"/>
70 <expand macro="populations_output_light" tooladd="(populations)"/>
71 </outputs>
72
73 <tests>
74 <!-- SE input as multi selection, defaults testing against the output of the pipeline components -->
75 <test>
76 <param name="input_type|input_type_selector" value="manual"/>
77 <param name="input_type|samples" value="demultiplexed/PopA_01.1.fq,demultiplexed/PopA_02.1.fq" ftype="fastqsanger" />
78 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />
79 <output_collection name="tabs" count="6">
80 <element name="PopA_01.tags" file="ustacks/PopA_01.tags.tsv" ftype="tabular" lines_diff="2"/>
81 <element name="PopA_01.snps" file="ustacks/PopA_01.snps.tsv" ftype="tabular" lines_diff="2"/>
82 <element name="PopA_01.alleles" file="ustacks/PopA_01.alleles.tsv" ftype="tabular" lines_diff="2"/>
83 <element name="PopA_02.tags" file="ustacks/PopA_02.tags.tsv" ftype="tabular" lines_diff="2"/>
84 <element name="PopA_02.snps" file="ustacks/PopA_02.snps.tsv" ftype="tabular" lines_diff="2"/>
85 <element name="PopA_02.alleles" file="ustacks/PopA_02.alleles.tsv" ftype="tabular" lines_diff="2"/>
86 </output_collection>
87 <output_collection name="catalog" type="list" count="3">
88 <element name="catalog.alleles" file="cstacks/catalog.alleles.tsv" ftype="tabular" lines_diff="2"/>
89 <element name="catalog.snps" file="cstacks/catalog.snps.tsv" ftype="tabular" lines_diff="2" />
90 <element name="catalog.tags" file="cstacks/catalog.tags.tsv" ftype="tabular" lines_diff="2" />
91 </output_collection>
92 <output_collection name="matches" type="list" count="2">
93 <element name="PopA_01.matches" file="sstacks/PopA_01.matches.tsv" ftype="tabular" lines_diff="2"/>
94 <element name="PopA_02.matches" file="sstacks/PopA_02.matches.tsv" ftype="tabular" lines_diff="2"/>
95 </output_collection>
96 <output_collection name="bams" type="list" count="2">
97 <element name="PopA_01.matches" file="tsv2bam/PopA_01.matches.bam" ftype="bam" compare="sim_size"/>
98 <element name="PopA_02.matches" file="tsv2bam/PopA_02.matches.bam" ftype="bam" compare="sim_size" />
99 </output_collection>
100 <output_collection name="gstacks_out" type="list" count="2">
101 <element name="catalog.calls.vcf" file="gstacks/catalog.calls.tsv" ftype="vcf" lines_diff="2"/>
102 <element name="catalog.fa.gz" file="gstacks/catalog.fa.gz" ftype="fasta.gz" compare="sim_size"/>
103 </output_collection>
104 <output ftype="tabular" name="out_haplotypes" value="populations/populations.haplotypes.tsv"/>
105 <output ftype="tabular" name="out_hapstats" value="populations/populations.hapstats.tsv"/>
106 <output ftype="txt" name="out_populations_log_distribs" value="populations/populations.log.distribs"/>
107 <output ftype="tabular" name="out_sumstats_sum" value="populations/populations.sumstats_summary.tsv"/>
108 <output ftype="tabular" name="out_sumstats" value="populations/populations.sumstats.tsv"/>
109 <output ftype="tabular" name="out_sql" value="populations/populations.markers.tsv"/>
110 </test>
111 <!-- SE input as list, non-defaults, testing only correct size of the collections -->
112 <test>
113 <param name="input_type|input_type_selector" value="list"/>
114 <param name="input_type|samples">
115 <collection type="list">
116 <element name="PopA_01" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger" />
117 <element name="PopA_02" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger"/>
118 </collection>
119 </param>
120 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />
121 <param name="assembly_options|M" value="3" />
122 <param name="assembly_options|n" value="3" />
123 <param name="model_options|var_alpha" value="0.1" />
124 <param name="model_options|gt_alpha" value="0.1" />
125 <assert_command>
126 <has_text text="-M 3" />
127 <has_text text="-n 3" />
128 <has_text text="--var-alpha 0.1" />
129 <has_text text="--gt-alpha 0.1" />
130 </assert_command>
131 <output_collection name="tabs" count="6"/>
132 <output_collection name="catalog" type="list" count="3"/>
133 <output_collection name="matches" type="list" count="2"/>
134 <output_collection name="bams" type="list" count="2"/>
135 <output_collection name="gstacks_out" type="list" count="2"/>
136 </test>
137 <!-- paired input, deafults (not using reverse reads), testing against the output of the pipeline components (note, this is equal because reverse reads are not used) -->
138 <test>
139 <param name="input_type|input_type_selector" value="list"/>
140 <param name="input_type|samples">
141 <collection type="list:paired">
142 <element name="PopA_01">
143 <collection type="paired">
144 <element name="forward" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger" />
145 <element name="reverse" value="demultiplexed/PopA_01.2.fq" ftype="fastqsanger"/>
146 </collection>
147 </element>
148 <element name="PopA_02">
149 <collection type="paired">
150 <element name="forward" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger" />
151 <element name="reverse" value="demultiplexed/PopA_02.2.fq" ftype="fastqsanger"/>
152 </collection>
153 </element>
154 </collection>
155 </param>
156 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />
157 <output_collection name="tabs" count="6">
158 <element name="PopA_01.tags" file="ustacks/PopA_01.tags.tsv" ftype="tabular" lines_diff="2"/>
159 <element name="PopA_01.snps" file="ustacks/PopA_01.snps.tsv" ftype="tabular" lines_diff="2"/>
160 <element name="PopA_01.alleles" file="ustacks/PopA_01.alleles.tsv" ftype="tabular" lines_diff="2"/>
161 <element name="PopA_02.tags" file="ustacks/PopA_02.tags.tsv" ftype="tabular" lines_diff="2"/>
162 <element name="PopA_02.snps" file="ustacks/PopA_02.snps.tsv" ftype="tabular" lines_diff="2"/>
163 <element name="PopA_02.alleles" file="ustacks/PopA_02.alleles.tsv" ftype="tabular" lines_diff="2"/>
164 </output_collection>
165 <output_collection name="catalog" type="list" count="3">
166 <element name="catalog.alleles" file="cstacks/catalog.alleles.tsv" ftype="tabular" lines_diff="2"/>
167 <element name="catalog.snps" file="cstacks/catalog.snps.tsv" ftype="tabular" lines_diff="2" />
168 <element name="catalog.tags" file="cstacks/catalog.tags.tsv" ftype="tabular" lines_diff="2" />
169 </output_collection>
170 <output_collection name="matches" type="list" count="2">
171 <element name="PopA_01.matches" file="sstacks/PopA_01.matches.tsv" ftype="tabular" lines_diff="2"/>
172 <element name="PopA_02.matches" file="sstacks/PopA_02.matches.tsv" ftype="tabular" lines_diff="2"/>
173 </output_collection>
174 <output_collection name="bams" type="list" count="2">
175 <element name="PopA_01.matches" file="tsv2bam/PopA_01.matches.bam" ftype="bam" compare="sim_size"/>
176 <element name="PopA_02.matches" file="tsv2bam/PopA_02.matches.bam" ftype="bam" compare="sim_size" />
177 </output_collection>
178 <output_collection name="gstacks_out" type="list" count="2">
179 <element name="catalog.calls.vcf" file="gstacks/catalog.calls.tsv" ftype="vcf" lines_diff="2"/>
180 <element name="catalog.fa.gz" file="gstacks/catalog.fa.gz" ftype="fasta.gz" compare="sim_size"/>
181 </output_collection>
182 <output ftype="tabular" name="out_haplotypes" value="populations/populations.haplotypes.tsv"/>
183 <output ftype="tabular" name="out_hapstats" value="populations/populations.hapstats.tsv"/>
184 <output ftype="txt" name="out_populations_log_distribs" value="populations/populations.log.distribs"/>
185 <output ftype="tabular" name="out_sumstats_sum" value="populations/populations.sumstats_summary.tsv"/>
186 <output ftype="tabular" name="out_sumstats" value="populations/populations.sumstats.tsv"/>
187 <output ftype="tabular" name="out_sql" value="populations/populations.markers.tsv"/>
188 </test>
189 <!-- paired input using also reverse reads, testing number of collection outputs -->
190 <test>
191 <param name="input_type|input_type_selector" value="list"/>
192 <param name="input_type|samples">
193 <collection type="list:paired">
194 <element name="PopA_01">
195 <collection type="paired">
196 <element name="forward" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger" />
197 <element name="reverse" value="demultiplexed/PopA_01.2.fq" ftype="fastqsanger"/>
198 </collection>
199 </element>
200 <element name="PopA_02">
201 <collection type="paired">
202 <element name="forward" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger" />
203 <element name="reverse" value="demultiplexed/PopA_02.2.fq" ftype="fastqsanger"/>
204 </collection>
205 </element>
206 </collection>
207 </param>
208 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />
209 <param name="paired" value="--paired" />
210 <output_collection name="tabs" count="6"/>
211 <output_collection name="catalog" type="list" count="3"/>
212 <output_collection name="matches" type="list" count="2"/>
213 <output_collection name="bams" type="list" count="2"/>
214 <output_collection name="gstacks_out" type="list" count="2"/>
215 </test>
216 </tests>
217
218 <help>
219 <![CDATA[
220 .. class:: infomark
221
222 **What it does**
223
224 This program will run each of the Stacks components: first, running ustacks on each of the samples specified, building loci and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci that were marked as 'parents' or 'samples' on the command line, and finally, sstacks will be executed to match each sample against the catalog. A bit more detail on this process can be found in the FAQ. The denovo_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching, the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering.
225
226 --------
227
228 **Input files**
229
230 FASTQ, FASTA
231
232 - Population map::
233
234 indv_01 1
235 indv_02 1
236 indv_03 1
237 indv_04 2
238 indv_05 2
239 indv_06 2
240
241
242 **Output files**
243
244
245 - XXX.tags.tsv file:
246
247 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
248
249 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.
250
251
252 - XXX.snps.tsv file:
253
254 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
255
256 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.
257
258
259 - XXX.alleles.tsv file:
260
261 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
262
263
264 - XXX.matches.tsv file:
265
266 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
267
268 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.
269
270
271 - other files:
272
273 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
274
275 @STACKS_INFOS@
276 ]]>
277 </help>
278 <expand macro="citation" />
279 </tool>