Mercurial > repos > nml > quasitools
comparison hydra.xml @ 0:1f1214983a1c draft default tip
planemo upload for repository https://github.com/phac-nml/quasitools commit 5a9e4c9a582828654893166caf20576f5e0c418e
| author | nml |
|---|---|
| date | Mon, 20 Jun 2022 20:05:57 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f1214983a1c |
|---|---|
| 1 <tool id="hydra" name="Hydra pipeline" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | |
| 2 <description>Identifies drug resistance within an NGS dataset</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"/> | |
| 7 <command detect_errors="exit_code"><![CDATA[ | |
| 8 | |
| 9 quasitools hydra | |
| 10 | |
| 11 ## Preparing file input. | |
| 12 #if $data_type.type == "paired": | |
| 13 | |
| 14 '$data_type.fastq_input1' | |
| 15 '$data_type.fastq_input2' | |
| 16 | |
| 17 #elif $data_type.type == "collection": | |
| 18 | |
| 19 '$data_type.fastq_input1.forward' | |
| 20 '$data_type.fastq_input1.reverse' | |
| 21 | |
| 22 #elif $data_type.type == "single": | |
| 23 | |
| 24 '$data_type.fastq_input1' | |
| 25 | |
| 26 #end if | |
| 27 | |
| 28 #if $mutation_db: | |
| 29 -m '$mutation_db' | |
| 30 #end if | |
| 31 | |
| 32 #if $reporting_threshold: | |
| 33 -rt '$reporting_threshold' | |
| 34 #end if | |
| 35 | |
| 36 #if $consensus_pct: | |
| 37 -cp '$consensus_pct' | |
| 38 #end if | |
| 39 | |
| 40 #if $length_cutoff: | |
| 41 -lc '$length_cutoff' | |
| 42 #end if | |
| 43 | |
| 44 #if $score_cutoff: | |
| 45 -sc '$score_cutoff' | |
| 46 #end if | |
| 47 | |
| 48 #if $error_rate: | |
| 49 -e '$error_rate' | |
| 50 #end if | |
| 51 | |
| 52 #if $min_read_qual: | |
| 53 -rq '$min_read_qual' | |
| 54 #end if | |
| 55 | |
| 56 #if $min_variant_qual: | |
| 57 -vq '$min_variant_qual' | |
| 58 #end if | |
| 59 | |
| 60 #if $min_depth: | |
| 61 -md '$min_depth' | |
| 62 #end if | |
| 63 | |
| 64 #if $min_ac: | |
| 65 -ma '$min_ac' | |
| 66 #end if | |
| 67 | |
| 68 #if $min_freq: | |
| 69 -mf '$min_freq' | |
| 70 #end if | |
| 71 | |
| 72 #if $consensus.consensus_bool == "true_consensus": | |
| 73 --generate_consensus | |
| 74 | |
| 75 #if $consensus.fasta_id.type == "default": | |
| 76 --id | |
| 77 #if $data_type.type == "paired": | |
| 78 '${fastq_input1.element_identifier}'_'${fastq_input2.element_identifier}' | |
| 79 #elif $data_type.type == "single": | |
| 80 '${fastq_input1.element_identifier}' | |
| 81 #end if | |
| 82 #elif $consensus.fasta_id.type == "custom": | |
| 83 --id '$consensus.fasta_id.custom_id' | |
| 84 #end if | |
| 85 #end if | |
| 86 | |
| 87 #if $low_quality.qual_selector == "filter_ns": | |
| 88 --ns | |
| 89 #elif $low_quality.qual_selector == "mask_reads": | |
| 90 --mask_reads | |
| 91 #end if | |
| 92 | |
| 93 #if $score_type.score_selector == "median": | |
| 94 --median | |
| 95 #elif $score_type.score_selector == "mean": | |
| 96 --mean | |
| 97 #end if | |
| 98 | |
| 99 $trim_reads | |
| 100 | |
| 101 -o output | |
| 102 | |
| 103 ]]></command> | |
| 104 <inputs> | |
| 105 <conditional name="data_type"> | |
| 106 <param name="type" type="select" label="Specify the read type."> | |
| 107 <option value="single">Single-end Data</option> | |
| 108 <option value="paired">Paired-end Data</option> | |
| 109 <option value="collection">Collection Paired-end Data</option> | |
| 110 </param> | |
| 111 <when value="single"> | |
| 112 <param name="fastq_input1" type="data" format="fastq" label="Single end read file(s)"/> | |
| 113 </when> | |
| 114 <when value="paired"> | |
| 115 <param name="fastq_input1" type="data" format="fastq" label="Forward paired-end read file"/> | |
| 116 <param name="fastq_input2" type="data" format="fastq" label="Reverse paired-end read file"/> | |
| 117 </when> | |
| 118 <when value="collection"> | |
| 119 <param name="fastq_input1" type="data_collection" label="Paired-end reads collection" optional="false" format="fastq" collection_type="paired" /> | |
| 120 </when> | |
| 121 </conditional> | |
| 122 <param name="mutation_db" type="data" format="tsv" optional="true" label="Mutation DB" help="Defaults to HIV mutation database." /> | |
| 123 <param name="reporting_threshold" type="integer" optional="true" min="1" max="100" value="1" label="Reporting threshold. Defaults to 1." help="Minimum mutation frequency to report." /> | |
| 124 <param name="consensus_pct" type="integer" optional="true" min="1" max="20" value="20" label="Consensus percentage" help="Minimum mutation frequency to report. Defaults to 20." /> | |
| 125 <param name="length_cutoff" type="integer" optional="true" min="1" max="1000" label="Length cutoff" value="100" help="Reads which fall short of the specified length will be filtered out. Defaults to 100." /> | |
| 126 <param name="score_cutoff" type="integer" optional="true" min="0" max="40" label="Score cutoff" value="30" help="Reads whose median or mean quality score (depending on the score type specified) is less than the specified score cutoff value will be filtered out. Defaults to 30." /> | |
| 127 <param name="error_rate" type="float" optional="true" min="0" max="1" label="Error rate" value="0.0021" help="Estimated sequencing error rate. Defaults to 0.0021."/> | |
| 128 <param name="min_variant_qual" type="integer" optional="true" min="1" max="100" label="Minimum quality" value="30" help="Minimum required quality for variant to be considered later on in the pipeline. Defaults to 30." /> | |
| 129 <param name="min_read_qual" type="integer" optional="true" min="1" max="100" label="Minimum quality" value="30" help="Minimum required quality for a position in a read not to be masked, is masking is enabled. Defaults to 30." /> | |
| 130 <param name="min_depth" type="integer" optional="true" min="0" max="5000" label="Minimum depth" value="100" help="Minimum required depth for variant to be considered later on in the pipeline. Defaults to 100." /> | |
| 131 <param name="min_ac" type="integer" optional="true" min="0" max="5000" label="Minimum allele count" value="5" help="Minimum required allele count for variant to be considered later on in the pipeline. Defaults to 5." /> | |
| 132 <param name="min_freq" type="float" optional="true" min="0" max="1" label="Minimum frequency" value="0.01" help="Minimum required frequency for variant to be considered later on in the pipeline. Defaults to 0.01." /> | |
| 133 <param name="trim_reads" type="boolean" optional="true" checked="false" truevalue="-tr" falsevalue="" label="Trim reads" help="Iteratively trim reads based on filter values if enabled." /> | |
| 134 <conditional name="consensus"> | |
| 135 <param name="consensus_bool" type="select" label="Generate consensus sequence." multiple="false" display="radio"> | |
| 136 <option value="true_consensus">True</option> | |
| 137 <option selected="true" value="false_consensus">False</option> | |
| 138 </param> | |
| 139 <when value="true_consensus"> | |
| 140 <conditional name="fasta_id"> | |
| 141 <param name="type" type="select" label="Specify consensus fasta identifier" multiple="false" display="radio"> | |
| 142 <option value="default" >Use fasta dataset name</option> | |
| 143 <option value="custom">Use custom name</option> | |
| 144 </param> | |
| 145 <when value="default"> | |
| 146 </when> | |
| 147 <when value="custom"> | |
| 148 <param name="custom_id" type="text" optional="false" value="custom_id" label="Fasta identifier" help="Type in a fasta identifier."/> | |
| 149 </when> | |
| 150 </conditional> | |
| 151 </when> | |
| 152 <when value="false_consensus"> | |
| 153 </when> | |
| 154 </conditional> | |
| 155 <conditional name="low_quality"> | |
| 156 <param name="qual_selector" type="select" label="Filter out regions masked, or mask low coverage regions with n's." multiple="false" display="radio"> | |
| 157 <option value="filter_ns">Filter out regions with n's</option> | |
| 158 <option value="mask_reads">Mask low coverage regions with n's</option> | |
| 159 <option value="neither" selected="true">Do not filter or mask low coverage regions.</option> | |
| 160 </param> | |
| 161 <when value="filter_ns"> | |
| 162 </when> | |
| 163 <when value="mask_reads"> | |
| 164 </when> | |
| 165 <when value="neither"> | |
| 166 </when> | |
| 167 </conditional> | |
| 168 <conditional name="score_type"> | |
| 169 <param name="score_selector" type="select" label="Use either median score (default) or mean score for the score cutoff value." multiple="false" display="radio"> | |
| 170 <option value="median" selected="true">Use median score</option> | |
| 171 <option value="mean">Use mean score</option> | |
| 172 </param> | |
| 173 <when value="median"> | |
| 174 </when> | |
| 175 <when value="mean"> | |
| 176 </when> | |
| 177 </conditional> | |
| 178 </inputs> | |
| 179 <outputs> | |
| 180 <data format="bam" label="HyDRA: alignment bam output" name="output_bam" from_work_dir="output/align.bam" /> | |
| 181 <data format="csv" label="HyDRA: coverage output" name="output_coverage" from_work_dir="output/coverage_file.csv" /> | |
| 182 <data format="csv" label="HyDRA: drug resistance output" name="output_dr" from_work_dir="output/dr_report.csv" /> | |
| 183 <data format="fastq" label="HyDRA: filtered reads output" name="output_filtered" from_work_dir="output/filtered.fastq" /> | |
| 184 <data format="vcf" label="HyDRA: variants output" name="output_hydra" from_work_dir="output/hydra.vcf" /> | |
| 185 <data format="txt" label="HyDRA: aa mutations output" name="output_aa_mt" from_work_dir="output/mutation_report.aavf" /> | |
| 186 <data format="txt" label="HyDRA: stats output" name="output_stats" from_work_dir="output/stats.txt" /> | |
| 187 <data format="fasta" label="HyDRA: consensus output" name="output_consensus" from_work_dir="output/consensus.fasta" > | |
| 188 <filter>consensus['consensus_bool'] == "true_consensus"</filter> | |
| 189 </data> | |
| 190 </outputs> | |
| 191 <tests> | |
| 192 <test> | |
| 193 <param name="type" value="single"/> | |
| 194 <param name="fastq_input1" value="forward.fastq" /> | |
| 195 <param name="score_selector" value="mean" /> | |
| 196 <output name="output_coverage"> | |
| 197 <assert_contents> | |
| 198 <has_text text="frame: 0" /> | |
| 199 <has_text text="1,0" /> | |
| 200 <has_text text="948,0" /> | |
| 201 </assert_contents> | |
| 202 </output> | |
| 203 <output name="output_dr"> | |
| 204 <assert_contents> | |
| 205 <has_text text="Chromosome,Gene,Category,Surveillance,Wildtype,Position,Mutation,Mutation Frequency,Coverage" /> | |
| 206 <has_text text="hxb2_pol,RT,NNRTI,Yes,K,101,P,14.23,1574" /> | |
| 207 <has_text text="hxb2_pol,RT,NNRTI,Yes,K,103,N,5.49,1912" /> | |
| 208 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,181,C,24.07,4557" /> | |
| 209 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,181,I,18.04,4557" /> | |
| 210 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,181,V,20.08,4557" /> | |
| 211 <has_text text="hxb2_pol,RT,NNRTI,Yes,Y,188,C,2.81,3454" /> | |
| 212 <has_text text="hxb2_pol,RT,NNRTI,Yes,G,190,A,5.20,3233" /> | |
| 213 <has_text text="hxb2_pol,RT,NNRTI,Yes,G,190,S,6.68,3233" /> | |
| 214 </assert_contents> | |
| 215 </output> | |
| 216 <output name="output_hydra"> | |
| 217 <assert_contents> | |
| 218 <has_text_matching expression="#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"/> | |
| 219 <has_text_matching expression="hxb2_pol\s576\s.\sa\sg\s100\sPASS\sDP=805;AC=245;AF=0.3043" /> | |
| 220 <has_text_matching expression="hxb2_pol\s958\s.\sc\sa\s100\sPASS\sDP=2503;AC=28;AF=0.0112" /> | |
| 221 </assert_contents> | |
| 222 </output> | |
| 223 <output name="output_aa_mt"> | |
| 224 <assert_contents> | |
| 225 <has_text_matching expression="#CHROM\tGENE\tPOS\tREF\tALT\tFILTER\tALT_FREQ\tCOVERAGE\tINFO"/> | |
| 226 <has_text_matching expression="hxb2_pol\tRT\t101\tK\tP\tPASS\t0.1423\t1574\tRC=aaa;AC=CCa;ACF=0.1423;CAT=NNRTI;SRVL=Yes" /> | |
| 227 <has_text_matching expression="hxb2_pol\tRT\t101\tK\tT\tmf0.01\t0.0013\t1574\tRC=aaa;AC=aCa;ACF=0.0013;CAT=.;SRVL=." /> | |
| 228 </assert_contents> | |
| 229 </output> | |
| 230 | |
| 231 <output name="output_stats"> | |
| 232 <assert_contents> | |
| 233 <has_text text="Input Size: 25000"/> | |
| 234 <has_text text="Number of reads filtered due to length: 15074"/> | |
| 235 <has_text text="Number of reads filtered due to average quality score: 501"/> | |
| 236 <has_text text="Number of reads filtered due to presence of Ns: 0"/> | |
| 237 <has_text text="Number of reads filtered due to excess coverage: 0"/> | |
| 238 <has_text text="Number of reads filtered due to poor mapping: 12"/> | |
| 239 <has_text text="Percentage of reads filtered: 62.35"/> | |
| 240 </assert_contents> | |
| 241 </output> | |
| 242 </test> | |
| 243 </tests> | |
| 244 <help><![CDATA[ | |
| 245 | |
| 246 HyDRA - HIV Drug Resistance Analyzer | |
| 247 ==================================== | |
| 248 | |
| 249 The HyDRA pipeline provides a pipeline for identifying drug resistance within a Next Generation Sequencing dataset. The pipeline takes as input the raw reads produced by a Next Generation Sequencer and produces a report detailing found drug resistance per sample. | |
| 250 | |
| 251 Authors | |
| 252 ------- | |
| 253 | |
| 254 The HyDRA pipeline was developed by Eric Enns and David Peddle. | |
| 255 | |
| 256 Stages | |
| 257 ------ | |
| 258 | |
| 259 The HyDRA pipleine proceeds through the following stages: | |
| 260 | |
| 261 1. Quality Control/Filtering | |
| 262 2. Reference mapping using bowtie2. | |
| 263 3. Variant Calling and filtering using a Poisson distribution. | |
| 264 4. AA Mutation Calling and filtering. | |
| 265 5. Drug Resistance report generation. | |
| 266 | |
| 267 Details | |
| 268 ------- | |
| 269 | |
| 270 The following is an example for running the pipeline, using our included test dataset: | |
| 271 * Output directory name: "/tmp/hydra_out" | |
| 272 * Forward reads: "reads_w_K103N.fastq" | |
| 273 | |
| 274 ### Output ### | |
| 275 | |
| 276 The detailed output directory tree looks as follows: | |
| 277 | |
| 278 /tmp/hydra_out/ | |
| 279 * align.bam | |
| 280 * coverage_file.csv | |
| 281 * dr_report.csv | |
| 282 * filtered.fastq | |
| 283 * hydra.vcf | |
| 284 * mutation_report.aavf | |
| 285 * stats.txt | |
| 286 | |
| 287 The description of each of these directories/files are as follows: | |
| 288 | |
| 289 * __run.conf__: The configuration used when this output was produced. | |
| 290 * __reads_w_K103N/__: The results directory for the input file reads_w_K103N.fastq | |
| 291 * __align.bam__: The alignment file in bam format. | |
| 292 * __coverage_file.csv__: A file with one entry per line with the AA position and the coverage at the position. | |
| 293 * __dr_report.csv__: A report detailing the drug resistant mutations found, above the reporting threshold (default: 1%). | |
| 294 * __filtered.fastq__: The reads remaining after the filtering stage. | |
| 295 * __hydra.vcf__: The variants found by the pipeline. | |
| 296 * __mutation_report.aavf__: The AA mutations found by the pipeline. | |
| 297 * __stats.txt__: A log file detailing size after filtering and major stages. | |
| 298 | |
| 299 The __dr_report.csv__ file lists all found drug resistant mutations (mutations included in the mutation database) which have frequency greater than the reporting threshold. An example of this file is given below. | |
| 300 | |
| 301 Example: __dr_report.csv__ | |
| 302 | |
| 303 Gene,Category,Surveillance,Wildtype,Position,Mutation,Mutation Frequency,Coverage | |
| 304 RT,NNRTI,Yes,K,103,N,9.03,155 | |
| 305 | |
| 306 The __mutation_report.aavf__ file is in AAVF format (https://github.com/winhiv/aavf-spec), an amino acid variant format inspired by the VCF format. The __mutation_report.aavf__ file details all of the AA mutations found by the pipeline. An example if this file is given below. | |
| 307 | |
| 308 Example: __mutation_report.aavf__ | |
| 309 | |
| 310 ##fileformat=AAVFv1.0 | |
| 311 ##fileDate=20220615 | |
| 312 ##source=quasitools:hydra | |
| 313 ##reference=hxb2_pol.fas | |
| 314 ##INFO=<ID=RC,Number=1,Type=String,Description="Reference Codon"> | |
| 315 ##INFO=<ID=AC,Number=.,Type=String,Description="Alternate Codon"> | |
| 316 ##INFO=<ID=ACF,Number=.,Type=Float,Description="Alternate Codon Frequency,for each Alternate Codon,in the same order aslisted."> | |
| 317 ##INFO=<ID=CAT,Number=.,Type=String,Description="Drug Resistance Category"> | |
| 318 ##INFO=<ID=SRVL,Number=.,Type=String,Description="Drug Resistance Surveillance"> | |
| 319 ##FILTER=<ID=af0.01,Description="Set if True; alt_freq<0.01"> | |
| 320 #CHROM GENE POS REF ALT FILTER ALT_FREQ COVERAGE INFO | |
| 321 hxb2_pol RT 101 K P PASS 0.1423 1574 RC=aaa;AC=CCa;ACF=0.1423;CAT=NNRTI;SRVL=Yes | |
| 322 | |
| 323 ]]></help> | |
| 324 <expand macro="citations" /> | |
| 325 </tool> |
