Mercurial > repos > oinizan > frogs
comparison preprocess.xml @ 7:76dcbe930b1d draft
"planemo upload for repository https://github.com/geraldinepascal/FROGS-wrappers/ commit 0a8dfe386b79711c479cf8a2bc8e9677e521b9e5-dirty"
| author | oinizan |
|---|---|
| date | Wed, 18 Aug 2021 15:43:00 +0000 |
| parents | 192cac570229 |
| children | 7bf54edaba24 |
comparison
equal
deleted
inserted
replaced
| 6:192cac570229 | 7:76dcbe930b1d |
|---|---|
| 13 # GNU General Public License for more details. | 13 # GNU General Public License for more details. |
| 14 # | 14 # |
| 15 # You should have received a copy of the GNU General Public License | 15 # You should have received a copy of the GNU General Public License |
| 16 # along with this program. If not, see <http://www.gnu.org/licenses/>. | 16 # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 17 --> | 17 --> |
| 18 <tool id="FROGS_preprocess" name="FROGS Pre-process" version="3.2.3.1"> | 18 <tool id="FROGS_preprocess" name="FROGS Pre-process" version="@TOOL_VERSION@+galaxy2"> |
| 19 <description>merging, denoising and dereplication.</description> | 19 <description>merging, denoising and dereplication.</description> |
| 20 <requirements> | 20 |
| 21 <requirement type="package" version="3.2.3">frogs</requirement> | 21 <macros> |
| 22 <requirement type="package" version="2.17.0">vsearch</requirement> | 22 <import>macros.xml</import> |
| 23 <requirement type="package" version="1.2.11">flash</requirement> | 23 </macros> |
| 24 <requirement type="package" version="2.10">cutadapt</requirement> | 24 |
| 25 </requirements> | 25 <expand macro="requirements" > |
| 26 <requirement type="package" version="2.17.0">vsearch</requirement> | |
| 27 <requirement type="package" version="1.2.11">flash</requirement> | |
| 28 <requirement type="package" version="2.10">cutadapt</requirement> | |
| 29 </expand> | |
| 30 | |
| 26 <stdio> | 31 <stdio> |
| 27 <exit_code range="1:" /> | 32 <exit_code range="1:" /> |
| 28 <exit_code range=":-1" /> | 33 <exit_code range=":-1" /> |
| 29 </stdio> | 34 </stdio> |
| 30 <command> | 35 <command> |
| 31 preprocess.py $sequencer_type.sequencer_selected | 36 preprocess.py '$sequencer_type.sequencer_selected' |
| 32 --output-dereplicated $dereplicated_file --output-count $count_file --summary $summary_file | 37 --output-dereplicated '$dereplicated_file' --output-count '$count_file' --summary '$summary_file' |
| 33 --nb-cpus \${GALAXY_SLOTS:-1} | 38 --nb-cpus \${GALAXY_SLOTS:-1} |
| 34 --min-amplicon-size $sequencer_type.min_amplicon_size --max-amplicon-size $sequencer_type.max_amplicon_size | 39 --min-amplicon-size $sequencer_type.min_amplicon_size --max-amplicon-size $sequencer_type.max_amplicon_size |
| 35 | 40 |
| 36 #if $sequencer_type.sequencer_selected == "illumina" | 41 #if $sequencer_type.sequencer_selected == "illumina" |
| 37 #if $sequencer_type.sequencing_protocol.sequencing_protocol_selected == "standard" | 42 #if $sequencer_type.sequencing_protocol.sequencing_protocol_selected == "standard" |
| 38 --five-prim-primer $sequencer_type.sequencing_protocol.five_prim_primer --three-prim-primer $sequencer_type.sequencing_protocol.three_prim_primer | 43 --five-prim-primer '$sequencer_type.sequencing_protocol.five_prim_primer' --three-prim-primer '$sequencer_type.sequencing_protocol.three_prim_primer' |
| 39 #else | 44 #else |
| 40 --without-primers | 45 --without-primers |
| 41 #end if | 46 #end if |
| 42 #else | 47 #else |
| 43 --five-prim-primer $sequencer_type.five_prim_primer --three-prim-primer $sequencer_type.three_prim_primer | 48 --five-prim-primer '$sequencer_type.five_prim_primer' --three-prim-primer '$sequencer_type.three_prim_primer' |
| 44 #end if | 49 #end if |
| 45 | 50 |
| 46 #if $sequencer_type.input_type.input_type_selected == "archive" | 51 #if $sequencer_type.input_type.input_type_selected == "archive" |
| 47 --input-archive $sequencer_type.input_type.archive_file | 52 --input-archive '$sequencer_type.input_type.archive_file' |
| 48 #if $sequencer_type.sequencer_selected == "illumina" and $sequencer_type.input_type.archive_type.archive_type_selected == "already_merged" | 53 #if $sequencer_type.sequencer_selected == "illumina" and $sequencer_type.input_type.archive_type.archive_type_selected == "already_merged" |
| 49 --already-contiged | 54 --already-contiged |
| 50 #elif $sequencer_type.sequencer_selected == "illumina" | 55 #elif $sequencer_type.sequencer_selected == "illumina" |
| 51 --R1-size $sequencer_type.input_type.archive_type.R1_size --R2-size $sequencer_type.input_type.archive_type.R2_size | 56 --R1-size $sequencer_type.input_type.archive_type.R1_size --R2-size $sequencer_type.input_type.archive_type.R2_size |
| 52 --mismatch-rate $sequencer_type.input_type.archive_type.mm_rate | 57 --mismatch-rate $sequencer_type.input_type.archive_type.mm_rate |
| 53 --merge-software $sequencer_type.input_type.archive_type.merge_software_type.merge_software_selected | 58 --merge-software '$sequencer_type.input_type.archive_type.merge_software_type.merge_software_selected' |
| 54 #if $sequencer_type.input_type.archive_type.merge_software_type.merge_software_selected == "flash" | 59 #if $sequencer_type.input_type.archive_type.merge_software_type.merge_software_selected == "flash" |
| 55 --expected-amplicon-size $sequencer_type.input_type.archive_type.merge_software_type.expected_amplicon_size | 60 --expected-amplicon-size $sequencer_type.input_type.archive_type.merge_software_type.expected_amplicon_size |
| 56 #end if | 61 #end if |
| 57 #if $sequencer_type.input_type.archive_type.keep_unmerged | 62 #if $sequencer_type.input_type.archive_type.keep_unmerged |
| 58 --keep-unmerged | 63 --keep-unmerged |
| 59 #end if | 64 #end if |
| 60 #end if | 65 #end if |
| 61 #else | 66 #else |
| 62 #set $sep = ' ' | 67 #set $sep = ' ' |
| 63 #if $sequencer_type.sequencer_selected == "illumina" | 68 #if $sequencer_type.sequencer_selected == "illumina" |
| 64 --samples-names | 69 --samples-names |
| 65 #for $current in $sequencer_type.input_type.files_by_samples_type.samples | 70 #for $current in $sequencer_type.input_type.files_by_samples_type.samples |
| 66 $sep'${current.name.strip()}' | 71 $sep'${current.name.strip()}' |
| 67 #end for | 72 #end for |
| 68 --input-R1 | 73 --input-R1 |
| 69 #for $current in $sequencer_type.input_type.files_by_samples_type.samples | 74 #for $current in $sequencer_type.input_type.files_by_samples_type.samples |
| 70 $sep${current.R1_file} | 75 $sep'${current.R1_file}' |
| 71 #end for | 76 #end for |
| 72 #if $sequencer_type.input_type.files_by_samples_type.files_by_samples_type_selected == "already_merged" | 77 #if $sequencer_type.input_type.files_by_samples_type.files_by_samples_type_selected == "already_merged" |
| 73 --already-contiged | 78 --already-contiged |
| 74 #else | 79 #else |
| 75 --input-R2 | 80 --input-R2 |
| 76 #for $current in $sequencer_type.input_type.files_by_samples_type.samples | 81 #for $current in $sequencer_type.input_type.files_by_samples_type.samples |
| 77 $sep${current.R2_file} | 82 $sep'${current.R2_file}' |
| 78 #end for | 83 #end for |
| 79 --R1-size $sequencer_type.input_type.files_by_samples_type.R1_size --R2-size $sequencer_type.input_type.files_by_samples_type.R2_size | 84 --R1-size $sequencer_type.input_type.files_by_samples_type.R1_size --R2-size $sequencer_type.input_type.files_by_samples_type.R2_size |
| 80 --mismatch-rate $sequencer_type.input_type.files_by_samples_type.mm_rate | 85 --mismatch-rate $sequencer_type.input_type.files_by_samples_type.mm_rate |
| 81 --merge-software $sequencer_type.input_type.files_by_samples_type.merge_software_type.merge_software_selected | 86 --merge-software $sequencer_type.input_type.files_by_samples_type.merge_software_type.merge_software_selected |
| 82 #if $sequencer_type.input_type.files_by_samples_type.merge_software_type.merge_software_selected == "flash" | 87 #if $sequencer_type.input_type.files_by_samples_type.merge_software_type.merge_software_selected == "flash" |
| 83 --expected-amplicon-size $sequencer_type.input_type.files_by_samples_type.merge_software_type.expected_amplicon_size | 88 --expected-amplicon-size $sequencer_type.input_type.files_by_samples_type.merge_software_type.expected_amplicon_size |
| 84 #end if | 89 #end if |
| 85 #if $sequencer_type.input_type.files_by_samples_type.keep_unmerged | 90 #if $sequencer_type.input_type.files_by_samples_type.keep_unmerged |
| 86 --keep-unmerged | 91 --keep-unmerged |
| 87 #end if | 92 #end if |
| 88 #end if | 93 #end if |
| 89 #else | 94 #else |
| 90 --input-R1 | 95 --input-R1 |
| 91 #for $current in $sequencer_type.input_type.samples | 96 #for $current in $sequencer_type.input_type.samples |
| 92 $sep${current.R1_file} | 97 $sep'${current.R1_file}' |
| 93 #end for | 98 #end for |
| 94 --samples-names | 99 --samples-names |
| 95 #for $current in $sequencer_type.input_type.samples | 100 #for $current in $sequencer_type.input_type.samples |
| 96 $sep'${current.name.strip()}' | 101 $sep'${current.name.strip()}' |
| 97 #end for | 102 #end for |
| 98 #end if | 103 #end if |
| 99 #end if | 104 #end if |
| 100 </command> | 105 </command> |
| 121 <!-- $sequencer_type.input_type.archive_type.archive_type_selected == "already_merged" --> | 126 <!-- $sequencer_type.input_type.archive_type.archive_type_selected == "already_merged" --> |
| 122 <when value="paired"> | 127 <when value="paired"> |
| 123 <!-- Reads size --> | 128 <!-- Reads size --> |
| 124 <param name="R1_size" type="integer" label="Reads 1 size" help="The maximum read1 size." value="" optional="false" /> | 129 <param name="R1_size" type="integer" label="Reads 1 size" help="The maximum read1 size." value="" optional="false" /> |
| 125 <param name="R2_size" type="integer" label="Reads 2 size" help="The maximum read2 size." value="" optional="false" /> | 130 <param name="R2_size" type="integer" label="Reads 2 size" help="The maximum read2 size." value="" optional="false" /> |
| 126 <param name="mm_rate" type="float" label="Mismatch rate." help="The maximum rate of mismatch in the overlap region" value="0.1" optional="false" /> | 131 <param name="mm_rate" type="float" label="Mismatch rate." help="The maximum rate of mismatch in the overlap region" value="0.1" optional="false" /> |
| 127 <conditional name="merge_software_type"> | 132 <conditional name="merge_software_type"> |
| 128 <param name="merge_software_selected" type="select" label="Merge software" help="Select the software to merge paired-end reads."> | 133 <param name="merge_software_selected" type="select" label="Merge software" help="Select the software to merge paired-end reads."> |
| 129 <option value="vsearch" selected="true">Vsearch</option> | 134 <option value="vsearch" selected="true">Vsearch</option> |
| 130 <option value="flash">Flash</option> | 135 <option value="flash">Flash</option> |
| 131 </param> | 136 </param> |
| 199 </param> | 204 </param> |
| 200 </when> | 205 </when> |
| 201 <when value="without_primers"></when> | 206 <when value="without_primers"></when> |
| 202 </conditional> | 207 </conditional> |
| 203 </when> | 208 </when> |
| 204 | 209 |
| 205 <when value="454"> | 210 <when value="454"> |
| 206 <!-- Samples --> | 211 <!-- Samples --> |
| 207 <conditional name="input_type"> | 212 <conditional name="input_type"> |
| 208 <param name="input_type_selected" type="select" label="Input type" help="Samples files can be provided in single archive or with one file by sample."> | 213 <param name="input_type_selected" type="select" label="Input type" help="Samples files can be provided in single archive or with one file by sample."> |
| 209 <option value="files_by_samples" selected="true">One file by sample</option> | 214 <option value="files_by_samples" selected="true">One file by sample</option> |
| 294 </conditional> | 299 </conditional> |
| 295 </conditional> | 300 </conditional> |
| 296 <output name="dereplicated_file" file="references/01-prepro-vsearch.fasta" compare="diff" lines_diff="0" /> | 301 <output name="dereplicated_file" file="references/01-prepro-vsearch.fasta" compare="diff" lines_diff="0" /> |
| 297 <output name="count_file" file="references/01-prepro-vsearch.tsv" compare="diff" lines_diff="0" /> | 302 <output name="count_file" file="references/01-prepro-vsearch.tsv" compare="diff" lines_diff="0" /> |
| 298 <output name="summary_file" file="references/01-prepro-vsearch.html" compare="sim_size" delta="0"/> | 303 <output name="summary_file" file="references/01-prepro-vsearch.html" compare="sim_size" delta="0"/> |
| 299 </test> | 304 </test> |
| 300 </tests> | 305 </tests> |
| 301 <help> | 306 <help> |
| 302 | 307 |
| 303 .. image:: static/images/FROGS_logo.png | 308 @HELP_LOGO@ |
| 304 :height: 144 | |
| 305 :width: 110 | |
| 306 | |
| 307 | 309 |
| 308 .. class:: infomark page-header h2 | 310 .. class:: infomark page-header h2 |
| 309 | 311 |
| 310 What it does | 312 What it does |
| 311 | 313 |
| 359 This file contains the count of all unique sequences in each sample (format `TSV <https://en.wikipedia.org/wiki/Tab-separated_values>`_). | 361 This file contains the count of all unique sequences in each sample (format `TSV <https://en.wikipedia.org/wiki/Tab-separated_values>`_). |
| 360 | 362 |
| 361 **Summary file** (report.html): | 363 **Summary file** (report.html): |
| 362 | 364 |
| 363 This file reports the number of remaining sequences after each filter (format `HTML <https://en.wikipedia.org/wiki/HTML>`_). Depending of the tool configuration there will be more or less filtering steps so more or less bars in the barplot. | 365 This file reports the number of remaining sequences after each filter (format `HTML <https://en.wikipedia.org/wiki/HTML>`_). Depending of the tool configuration there will be more or less filtering steps so more or less bars in the barplot. |
| 364 | 366 |
| 365 .. image:: static/images/FROGS_preprocess_summary_v3.png | 367 .. image:: static/images/FROGS_preprocess_summary_v3.png |
| 366 :height: 850 | 368 :height: 850 |
| 367 :width: 831 | 369 :width: 831 |
| 368 | 370 |
| 369 It also presents the length distribution of the full amplicon sequences after merging step and after filtering steps. | 371 It also presents the length distribution of the full amplicon sequences after merging step and after filtering steps. |
| 370 | 372 |
| 371 .. image:: static/images/FROGS_preprocess_lengthsSamples_v3.png | 373 .. image:: static/images/FROGS_preprocess_lengthsSamples_v3.png |
| 372 :height: 379 | 374 :height: 379 |
| 373 :width: 364 | 375 :width: 364 |
| 374 | 376 |
| 375 .. class:: infomark page-header h2 | 377 .. class:: infomark page-header h2 |
| 376 | 378 |
| 377 How it works | 379 How it works |
| 378 | 380 |
| 379 .. csv-table:: | 381 .. csv-table:: |
| 380 :header: "Steps", "Illumina", "454" | 382 :header: "Steps", "Illumina", "454" |
| 381 :widths: 5, 150, 150 | 383 :widths: 5, 150, 150 |
| 382 :class: table table-striped | 384 :class: table table-striped |
| 383 | 385 |
| 384 "1", "For un-merged data: Merges R1 and R2 with a maximum of M% mismatch in the overlaped region(`VSEARCH <https://github.com/torognes/vsearch/>`_ or `FLASH <https://ccb.jhu.edu/software/FLASH/>`_ or optionnaly `PEAR <https://sco.h-its.org/exelixis/web/software/pear/>`_) with a minimum of 10 bp in the overlap region. Resulting un-merged reads may optionnaly be artificially combined by adding 100 N between the reads", "/" | 386 "1", "For un-merged data: Merges R1 and R2 with a maximum of M% mismatch in the overlaped region(`VSEARCH <https://github.com/torognes/vsearch/>`_ or `FLASH <http://ccb.jhu.edu/software/FLASH/>`_ or optionnaly `PEAR <https://sco.h-its.org/exelixis/web/software/pear/>`_) with a minimum of 10 bp in the overlap region. Resulting un-merged reads may optionnaly be artificially combined by adding 100 N between the reads", "/" |
| 385 "2", "If sequencing protocol is the illumina standard protocol : Removes sequences where the two primers are not present and removes primers in the remaining sequence (`cutadapt <http://cutadapt.readthedocs.org/en/latest/guide.html>`_). The primer search accepts 10% of differences", "Removes sequences where the two primers are not present, removes primers sequence from amplicon sequence and reverse complement the sequences on strand - (`cutadapt <http://cutadapt.readthedocs.org/en/latest/guide.html>`_). The primer search accepts 10% of differences" | 387 "2", "If sequencing protocol is the illumina standard protocol : Removes sequences where the two primers are not present and removes primers in the remaining sequence (`cutadapt <http://cutadapt.readthedocs.org/en/latest/guide.html>`_). The primer search accepts 10% of differences", "Removes sequences where the two primers are not present, removes primers sequence from amplicon sequence and reverse complement the sequences on strand - (`cutadapt <http://cutadapt.readthedocs.org/en/latest/guide.html>`_). The primer search accepts 10% of differences" |
| 386 "3", "Filters sequences with ambiguous nucleotides and for merged sequences filters on their length which must be range between 'Minimum amplicon size - primer length' and 'Maximum amplicon size - primer length'", "Removes sequences with at least one homopolymer with more than seven nucleotides and with a distance of less than or equal to 10 nucleo-tides between two poor quality positions, i.e. with a Phred quality score lesser than 10" | 388 "3", "Filters sequences with ambiguous nucleotides and for merged sequences filters on their length which must be range between 'Minimum amplicon size - primer length' and 'Maximum amplicon size - primer length'", "Removes sequences with at least one homopolymer with more than seven nucleotides and with a distance of less than or equal to 10 nucleo-tides between two poor quality positions, i.e. with a Phred quality score lesser than 10" |
| 387 "4", "Dereplicates sequences", "Dereplicates sequences" | 389 "4", "Dereplicates sequences", "Dereplicates sequences" |
| 388 | 390 |
| 389 | 391 |
| 390 .. class:: infomark page-header h2 | 392 .. class:: infomark page-header h2 |
| 391 | 393 |
| 433 | 435 |
| 434 Primers parameters | 436 Primers parameters |
| 435 | 437 |
| 436 The (`Kozich et al. 2013 <http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3753973/>`_ ) protocol uses custom sequencing primers which are also the PCR primers. In this case the reads do not contain the PCR primers. | 438 The (`Kozich et al. 2013 <http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3753973/>`_ ) protocol uses custom sequencing primers which are also the PCR primers. In this case the reads do not contain the PCR primers. |
| 437 | 439 |
| 438 In case of Illumina standard protocol, the primers must be provided in 5' to 3' orientation. | 440 In case of Illumina standard protocol, the primers must be provided in 5' to 3' orientation. |
| 439 | 441 |
| 440 .. role:: alert-info | 442 .. role:: alert-info |
| 441 | 443 |
| 442 Example: | 444 Example: |
| 443 | 445 |
| 444 5' :alert-info:`ATGCCC` GTCGTCGTAAAATGC :alert-info:`ATTTCAG` 3' | 446 5' :alert-info:`ATGCCC` GTCGTCGTAAAATGC :alert-info:`ATTTCAG` 3' |
| 445 | 447 |
| 446 Value for parameter 5' primer: ATGCCC | 448 Value for parameter 5' primer: ATGCCC |
| 447 | 449 |
| 448 Value for parameter 3' primer: ATTTCAG | 450 Value for parameter 3' primer: ATTTCAG |
| 449 | 451 |
| 450 .. class:: h3 | 452 .. class:: h3 |
| 451 | 453 |
| 452 FLASH : Amplicons sizes parameters | 454 FLASH : Amplicons sizes parameters |
| 458 The two following images show two examples of perfect values fors sizes parameters. | 460 The two following images show two examples of perfect values fors sizes parameters. |
| 459 | 461 |
| 460 .. image:: static/images/FROGS_preprocess_ampliconSize_unimodal_v3.png | 462 .. image:: static/images/FROGS_preprocess_ampliconSize_unimodal_v3.png |
| 461 :height: 415 | 463 :height: 415 |
| 462 :width: 676 | 464 :width: 676 |
| 463 | 465 |
| 464 .. image:: static/images/FROGS_preprocess_ampliconSize_multimodal_v3.png | 466 .. image:: static/images/FROGS_preprocess_ampliconSize_multimodal_v3.png |
| 465 :height: 415 | 467 :height: 415 |
| 466 :width: 676 | 468 :width: 676 |
| 467 | 469 |
| 468 Don't worry the "Expected amplicon size" does not need to be very accurate, and only necessary for sequences merging with FLASH. | 470 Don't worry the "Expected amplicon size" does not need to be very accurate, and only necessary for sequences merging with FLASH. |
| 470 .. class:: h3 | 472 .. class:: h3 |
| 471 | 473 |
| 472 If the filter 'merged' reduce drasticaly the number of sequences: | 474 If the filter 'merged' reduce drasticaly the number of sequences: |
| 473 | 475 |
| 474 In un-merged Illumina data, and targeted amplicon size in the range of R1+R2-10, the reduction of dataset by the merged filter is classicaly inferior than 20%. A loss of more than 20% in all samples can highlight a quality problem. | 476 In un-merged Illumina data, and targeted amplicon size in the range of R1+R2-10, the reduction of dataset by the merged filter is classicaly inferior than 20%. A loss of more than 20% in all samples can highlight a quality problem. |
| 475 | 477 |
| 476 If the overlap between R1 and R2 is superior to 50 nucleotides and the quality of the end of the sequences is poor (see `FastQC <http://www.bioinformatics.babraham.ac.uk/projects/fastqc/>`_) you can try to cut the end of your sequences and relaunch the preprocess tool. You can either raise the mismatch percent in the overlapped region, but not too much! | 478 If the overlap between R1 and R2 is superior to 50 nucleotides and the quality of the end of the sequences is poor (see `FastQC <http://www.bioinformatics.babraham.ac.uk/projects/fastqc/>`_) you can try to cut the end of your sequences and relaunch the preprocess tool. You can either raise the mismatch percent in the overlapped region, but not too much! |
| 477 | 479 |
| 478 ---- | 480 |
| 479 | 481 @HELP_CONTACT@ |
| 480 **Contact** | |
| 481 | |
| 482 Contacts: frogs-support@inrae.fr | |
| 483 | |
| 484 Repositories: https://github.com/geraldinepascal/FROGS, https://github.com/geraldinepascal/FROGS-wrappers | |
| 485 | |
| 486 Website: http://frogs.toulouse.inrae.fr/ | |
| 487 | |
| 488 Please cite the **FROGS article**: `Escudie F., et al. Bioinformatics, 2018. FROGS: Find, Rapidly, OTUs with Galaxy Solution. <https://doi.org/10.1093/bioinformatics/btx791>`_ | |
| 489 | 482 |
| 490 </help> | 483 </help> |
| 484 | |
| 485 <citations> | |
| 486 <expand macro="citations" /> | |
| 487 </citations> | |
| 488 | |
| 491 </tool> | 489 </tool> |
