comparison hydra.xml @ 0:1f1214983a1c draft default tip

planemo upload for repository https://github.com/phac-nml/quasitools commit 5a9e4c9a582828654893166caf20576f5e0c418e
author nml
date Mon, 20 Jun 2022 20:05:57 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f1214983a1c
1 <tool id="hydra" name="Hydra pipeline" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>Identifies drug resistance within an NGS dataset</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <command detect_errors="exit_code"><![CDATA[
8
9 quasitools hydra
10
11 ## Preparing file input.
12 #if $data_type.type == "paired":
13
14 '$data_type.fastq_input1'
15 '$data_type.fastq_input2'
16
17 #elif $data_type.type == "collection":
18
19 '$data_type.fastq_input1.forward'
20 '$data_type.fastq_input1.reverse'
21
22 #elif $data_type.type == "single":
23
24 '$data_type.fastq_input1'
25
26 #end if
27
28 #if $mutation_db:
29 -m '$mutation_db'
30 #end if
31
32 #if $reporting_threshold:
33 -rt '$reporting_threshold'
34 #end if
35
36 #if $consensus_pct:
37 -cp '$consensus_pct'
38 #end if
39
40 #if $length_cutoff:
41 -lc '$length_cutoff'
42 #end if
43
44 #if $score_cutoff:
45 -sc '$score_cutoff'
46 #end if
47
48 #if $error_rate:
49 -e '$error_rate'
50 #end if
51
52 #if $min_read_qual:
53 -rq '$min_read_qual'
54 #end if
55
56 #if $min_variant_qual:
57 -vq '$min_variant_qual'
58 #end if
59
60 #if $min_depth:
61 -md '$min_depth'
62 #end if
63
64 #if $min_ac:
65 -ma '$min_ac'
66 #end if
67
68 #if $min_freq:
69 -mf '$min_freq'
70 #end if
71
72 #if $consensus.consensus_bool == "true_consensus":
73 --generate_consensus
74
75 #if $consensus.fasta_id.type == "default":
76 --id
77 #if $data_type.type == "paired":
78 '${fastq_input1.element_identifier}'_'${fastq_input2.element_identifier}'
79 #elif $data_type.type == "single":
80 '${fastq_input1.element_identifier}'
81 #end if
82 #elif $consensus.fasta_id.type == "custom":
83 --id '$consensus.fasta_id.custom_id'
84 #end if
85 #end if
86
87 #if $low_quality.qual_selector == "filter_ns":
88 --ns
89 #elif $low_quality.qual_selector == "mask_reads":
90 --mask_reads
91 #end if
92
93 #if $score_type.score_selector == "median":
94 --median
95 #elif $score_type.score_selector == "mean":
96 --mean
97 #end if
98
99 $trim_reads
100
101 -o output
102
103 ]]></command>
104 <inputs>
105 <conditional name="data_type">
106 <param name="type" type="select" label="Specify the read type.">
107 <option value="single">Single-end Data</option>
108 <option value="paired">Paired-end Data</option>
109 <option value="collection">Collection Paired-end Data</option>
110 </param>
111 <when value="single">
112 <param name="fastq_input1" type="data" format="fastq" label="Single end read file(s)"/>
113 </when>
114 <when value="paired">
115 <param name="fastq_input1" type="data" format="fastq" label="Forward paired-end read file"/>
116 <param name="fastq_input2" type="data" format="fastq" label="Reverse paired-end read file"/>
117 </when>
118 <when value="collection">
119 <param name="fastq_input1" type="data_collection" label="Paired-end reads collection" optional="false" format="fastq" collection_type="paired" />
120 </when>
121 </conditional>
122 <param name="mutation_db" type="data" format="tsv" optional="true" label="Mutation DB" help="Defaults to HIV mutation database." />
123 <param name="reporting_threshold" type="integer" optional="true" min="1" max="100" value="1" label="Reporting threshold. Defaults to 1." help="Minimum mutation frequency to report." />
124 <param name="consensus_pct" type="integer" optional="true" min="1" max="20" value="20" label="Consensus percentage" help="Minimum mutation frequency to report. Defaults to 20." />
125 <param name="length_cutoff" type="integer" optional="true" min="1" max="1000" label="Length cutoff" value="100" help="Reads which fall short of the specified length will be filtered out. Defaults to 100." />
126 <param name="score_cutoff" type="integer" optional="true" min="0" max="40" label="Score cutoff" value="30" help="Reads whose median or mean quality score (depending on the score type specified) is less than the specified score cutoff value will be filtered out. Defaults to 30." />
127 <param name="error_rate" type="float" optional="true" min="0" max="1" label="Error rate" value="0.0021" help="Estimated sequencing error rate. Defaults to 0.0021."/>
128 <param name="min_variant_qual" type="integer" optional="true" min="1" max="100" label="Minimum quality" value="30" help="Minimum required quality for variant to be considered later on in the pipeline. Defaults to 30." />
129 <param name="min_read_qual" type="integer" optional="true" min="1" max="100" label="Minimum quality" value="30" help="Minimum required quality for a position in a read not to be masked, is masking is enabled. Defaults to 30." />
130 <param name="min_depth" type="integer" optional="true" min="0" max="5000" label="Minimum depth" value="100" help="Minimum required depth for variant to be considered later on in the pipeline. Defaults to 100." />
131 <param name="min_ac" type="integer" optional="true" min="0" max="5000" label="Minimum allele count" value="5" help="Minimum required allele count for variant to be considered later on in the pipeline. Defaults to 5." />
132 <param name="min_freq" type="float" optional="true" min="0" max="1" label="Minimum frequency" value="0.01" help="Minimum required frequency for variant to be considered later on in the pipeline. Defaults to 0.01." />
133 <param name="trim_reads" type="boolean" optional="true" checked="false" truevalue="-tr" falsevalue="" label="Trim reads" help="Iteratively trim reads based on filter values if enabled." />
134 <conditional name="consensus">
135 <param name="consensus_bool" type="select" label="Generate consensus sequence." multiple="false" display="radio">
136 <option value="true_consensus">True</option>
137 <option selected="true" value="false_consensus">False</option>
138 </param>
139 <when value="true_consensus">
140 <conditional name="fasta_id">
141 <param name="type" type="select" label="Specify consensus fasta identifier" multiple="false" display="radio">
142 <option value="default" >Use fasta dataset name</option>
143 <option value="custom">Use custom name</option>
144 </param>
145 <when value="default">
146 </when>
147 <when value="custom">
148 <param name="custom_id" type="text" optional="false" value="custom_id" label="Fasta identifier" help="Type in a fasta identifier."/>
149 </when>
150 </conditional>
151 </when>
152 <when value="false_consensus">
153 </when>
154 </conditional>
155 <conditional name="low_quality">
156 <param name="qual_selector" type="select" label="Filter out regions masked, or mask low coverage regions with n's." multiple="false" display="radio">
157 <option value="filter_ns">Filter out regions with n's</option>
158 <option value="mask_reads">Mask low coverage regions with n's</option>
159 <option value="neither" selected="true">Do not filter or mask low coverage regions.</option>
160 </param>
161 <when value="filter_ns">
162 </when>
163 <when value="mask_reads">
164 </when>
165 <when value="neither">
166 </when>
167 </conditional>
168 <conditional name="score_type">
169 <param name="score_selector" type="select" label="Use either median score (default) or mean score for the score cutoff value." multiple="false" display="radio">
170 <option value="median" selected="true">Use median score</option>
171 <option value="mean">Use mean score</option>
172 </param>
173 <when value="median">
174 </when>
175 <when value="mean">
176 </when>
177 </conditional>
178 </inputs>
179 <outputs>
180 <data format="bam" label="HyDRA: alignment bam output" name="output_bam" from_work_dir="output/align.bam" />
181 <data format="csv" label="HyDRA: coverage output" name="output_coverage" from_work_dir="output/coverage_file.csv" />
182 <data format="csv" label="HyDRA: drug resistance output" name="output_dr" from_work_dir="output/dr_report.csv" />
183 <data format="fastq" label="HyDRA: filtered reads output" name="output_filtered" from_work_dir="output/filtered.fastq" />
184 <data format="vcf" label="HyDRA: variants output" name="output_hydra" from_work_dir="output/hydra.vcf" />
185 <data format="txt" label="HyDRA: aa mutations output" name="output_aa_mt" from_work_dir="output/mutation_report.aavf" />
186 <data format="txt" label="HyDRA: stats output" name="output_stats" from_work_dir="output/stats.txt" />
187 <data format="fasta" label="HyDRA: consensus output" name="output_consensus" from_work_dir="output/consensus.fasta" >
188 <filter>consensus['consensus_bool'] == "true_consensus"</filter>
189 </data>
190 </outputs>
191 <tests>
192 <test>
193 <param name="type" value="single"/>
194 <param name="fastq_input1" value="forward.fastq" />
195 <param name="score_selector" value="mean" />
196 <output name="output_coverage">
197 <assert_contents>
198 <has_text text="frame: 0" />
199 <has_text text="1,0" />
200 <has_text text="948,0" />
201 </assert_contents>
202 </output>
203 <output name="output_dr">
204 <assert_contents>
205 <has_text text="Chromosome,Gene,Category,Surveillance,Wildtype,Position,Mutation,Mutation Frequency,Coverage" />
206 <has_text text="hxb2_pol,RT,NNRTI,Yes,K,101,P,14.23,1574" />
207 <has_text text="hxb2_pol,RT,NNRTI,Yes,K,103,N,5.49,1912" />
208 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,181,C,24.07,4557" />
209 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,181,I,18.04,4557" />
210 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,181,V,20.08,4557" />
211 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,188,C,2.81,3454" />
212 <has_text text="hxb2_pol,RT,NNRTI,Yes,G,190,A,5.20,3233" />
213 <has_text text="hxb2_pol,RT,NNRTI,Yes,G,190,S,6.68,3233" />
214 </assert_contents>
215 </output>
216 <output name="output_hydra">
217 <assert_contents>
218 <has_text_matching expression="#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"/>
219 <has_text_matching expression="hxb2_pol\s576\s.\sa\sg\s100\sPASS\sDP=805;AC=245;AF=0.3043" />
220 <has_text_matching expression="hxb2_pol\s958\s.\sc\sa\s100\sPASS\sDP=2503;AC=28;AF=0.0112" />
221 </assert_contents>
222 </output>
223 <output name="output_aa_mt">
224 <assert_contents>
225 <has_text_matching expression="#CHROM\tGENE\tPOS\tREF\tALT\tFILTER\tALT_FREQ\tCOVERAGE\tINFO"/>
226 <has_text_matching expression="hxb2_pol\tRT\t101\tK\tP\tPASS\t0.1423\t1574\tRC=aaa;AC=CCa;ACF=0.1423;CAT=NNRTI;SRVL=Yes" />
227 <has_text_matching expression="hxb2_pol\tRT\t101\tK\tT\tmf0.01\t0.0013\t1574\tRC=aaa;AC=aCa;ACF=0.0013;CAT=.;SRVL=." />
228 </assert_contents>
229 </output>
230
231 <output name="output_stats">
232 <assert_contents>
233 <has_text text="Input Size: 25000"/>
234 <has_text text="Number of reads filtered due to length: 15074"/>
235 <has_text text="Number of reads filtered due to average quality score: 501"/>
236 <has_text text="Number of reads filtered due to presence of Ns: 0"/>
237 <has_text text="Number of reads filtered due to excess coverage: 0"/>
238 <has_text text="Number of reads filtered due to poor mapping: 12"/>
239 <has_text text="Percentage of reads filtered: 62.35"/>
240 </assert_contents>
241 </output>
242 </test>
243 </tests>
244 <help><![CDATA[
245
246 HyDRA - HIV Drug Resistance Analyzer
247 ====================================
248
249 The HyDRA pipeline provides a pipeline for identifying drug resistance within a Next Generation Sequencing dataset. The pipeline takes as input the raw reads produced by a Next Generation Sequencer and produces a report detailing found drug resistance per sample.
250
251 Authors
252 -------
253
254 The HyDRA pipeline was developed by Eric Enns and David Peddle.
255
256 Stages
257 ------
258
259 The HyDRA pipleine proceeds through the following stages:
260
261 1. Quality Control/Filtering
262 2. Reference mapping using bowtie2.
263 3. Variant Calling and filtering using a Poisson distribution.
264 4. AA Mutation Calling and filtering.
265 5. Drug Resistance report generation.
266
267 Details
268 -------
269
270 The following is an example for running the pipeline, using our included test dataset:
271 * Output directory name: "/tmp/hydra_out"
272 * Forward reads: "reads_w_K103N.fastq"
273
274 ### Output ###
275
276 The detailed output directory tree looks as follows:
277
278 /tmp/hydra_out/
279 * align.bam
280 * coverage_file.csv
281 * dr_report.csv
282 * filtered.fastq
283 * hydra.vcf
284 * mutation_report.aavf
285 * stats.txt
286
287 The description of each of these directories/files are as follows:
288
289 * __run.conf__: The configuration used when this output was produced.
290 * __reads_w_K103N/__: The results directory for the input file reads_w_K103N.fastq
291 * __align.bam__: The alignment file in bam format.
292 * __coverage_file.csv__: A file with one entry per line with the AA position and the coverage at the position.
293 * __dr_report.csv__: A report detailing the drug resistant mutations found, above the reporting threshold (default: 1%).
294 * __filtered.fastq__: The reads remaining after the filtering stage.
295 * __hydra.vcf__: The variants found by the pipeline.
296 * __mutation_report.aavf__: The AA mutations found by the pipeline.
297 * __stats.txt__: A log file detailing size after filtering and major stages.
298
299 The __dr_report.csv__ file lists all found drug resistant mutations (mutations included in the mutation database) which have frequency greater than the reporting threshold. An example of this file is given below.
300
301 Example: __dr_report.csv__
302
303 Gene,Category,Surveillance,Wildtype,Position,Mutation,Mutation Frequency,Coverage
304 RT,NNRTI,Yes,K,103,N,9.03,155
305
306 The __mutation_report.aavf__ file is in AAVF format (https://github.com/winhiv/aavf-spec), an amino acid variant format inspired by the VCF format. The __mutation_report.aavf__ file details all of the AA mutations found by the pipeline. An example if this file is given below.
307
308 Example: __mutation_report.aavf__
309
310 ##fileformat=AAVFv1.0
311 ##fileDate=20220615
312 ##source=quasitools:hydra
313 ##reference=hxb2_pol.fas
314 ##INFO=<ID=RC,Number=1,Type=String,Description="Reference Codon">
315 ##INFO=<ID=AC,Number=.,Type=String,Description="Alternate Codon">
316 ##INFO=<ID=ACF,Number=.,Type=Float,Description="Alternate Codon Frequency,for each Alternate Codon,in the same order aslisted.">
317 ##INFO=<ID=CAT,Number=.,Type=String,Description="Drug Resistance Category">
318 ##INFO=<ID=SRVL,Number=.,Type=String,Description="Drug Resistance Surveillance">
319 ##FILTER=<ID=af0.01,Description="Set if True; alt_freq<0.01">
320 #CHROM GENE POS REF ALT FILTER ALT_FREQ COVERAGE INFO
321 hxb2_pol RT 101 K P PASS 0.1423 1574 RC=aaa;AC=CCa;ACF=0.1423;CAT=NNRTI;SRVL=Yes
322
323 ]]></help>
324 <expand macro="citations" />
325 </tool>