Mercurial > repos > nml > cryptogenotyper
changeset 0:c64e19bda2a6 draft default tip
planemo upload for repository https://github.com/phac-nml/CryptoGenotyper commit fdca1f95a5d09edf00bddd42286b68fcb20fa981
author | nml |
---|---|
date | Fri, 12 Sep 2025 18:50:28 +0000 |
parents | |
children | |
files | README.md cryptogenotyper.xml images/interface_pic.png test-data/P17705_Crypto16-2F-20170927_SSUF_G12_084.ab1 test-data/P17705_Crypto16-2R-20170927_SSUR_H12_082.ab1 test-data/P17705_gp60-Crypt14-1F-20170927_gp60F_G07_051.ab1 test-data/P17705_gp60-Crypt14-1R-20170927_gp60R_H07_049.ab1 test-data/test_illumina_18S_F.fasta test-data/test_illumina_18S_R.fasta test-data/test_illumina_gp60_F1.fasta test-data/test_illumina_gp60_R1.fasta |
diffstat | 11 files changed, 366 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Fri Sep 12 18:50:28 2025 +0000 @@ -0,0 +1,6 @@ +# CryptoGenotyper +This code allows running `CryptoGenotyper` ([https://github.com/phac-nml/CryptoGenotyper](https://github.com/phac-nml/CryptoGenotyper)) on Galaxy Project server with a convenient interface. + +This wrapper provides access to complete functionality of the command line tool allowing incorporation into any workflows. + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cryptogenotyper.xml Fri Sep 12 18:50:28 2025 +0000 @@ -0,0 +1,335 @@ +<tool id="CryptoGenotyper" name = "CryptoGenotyper" version="@VERSION@+galaxy0"> + <description> + classifies Cryptosporidium species subtypes based on SSU rRNA and gp60 gene markers from Sanger sequencing data. + </description> + <macros> + <token name="@VERSION@">1.5.0</token> + </macros> + <requirements> + <requirement type="package" version ="@VERSION@">cryptogenotyper</requirement> + </requirements> + <version_command>cryptogenotyper --version</version_command> + <command detect_errors="exit_code"> + <![CDATA[ + + #set $ref_file='' + + #if $db + ln -sf '${db}' '${db.name}' && + #set $ref_file = $db.name + #end if + + #if $primers['seqtype'] == 'contig' + #set $forward_name=$primers.input.forward.name.rsplit('.', 1)[0] + #set $reverse_name=$primers.input.reverse.name.rsplit('.', 1)[0] + ln -sf '${$primers.input['forward']}' '${forward_name}_forward.${primers.input.forward.ext}' && + ln -sf '${$primers.input['reverse']}' '${reverse_name}_reverse.${primers.input.reverse.ext}' && + cryptogenotyper -i '.' -m '$marker' -t '$primers.seqtype' -f 'forward' -r 'reverse' + #if $db + --databasefile $ref_file + #end if + #else + #set $filename_no_ext=$primers.input.name.rsplit('.', 1)[0] + ln -sf '${primers.input}' '${filename_no_ext}.${primers.input.ext}' && + cryptogenotyper -i './${primers.input.name}' -m '$marker' -t '$primers.seqtype' + #if $db + --databasefile $ref_file + #end if + #end if + $outputheader + -o 'result' + ]]> + </command> + <inputs> + <param name="marker" type="select" label="Marker"> + <option value="18S">SSU rRNA (18S)</option> + <option value="gp60">gp60</option> + </param> + <param name="db" type="data" optional="true" format="fasta" label="Reference Database File (optional):"/> + <conditional name="primers"> + <param name="seqtype" type="select" label="Type of Sequences"> + <option value="forward" selected="true">Forward Only</option> + <option value="reverse">Reverse Only</option> + <option value="contig">Contig</option> + </param> + <when value="contig"> + <param name="input" type="data_collection" collection_type="paired" format="ab1,fasta" label="Paired Sequencing File(s)"/> + </when> + <when value="forward"> + <param name="input" type="data" format="ab1,fasta" label="Forward Sequencing File(s)"/> + </when> + <when value="reverse"> + <param name="input" type="data" format="ab1,fasta" label="Reverse Sequencing File(s)"/> + </when> + </conditional> + <param name="show_log" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output run log?"></param> + <param name="outputheader" type="boolean" truevalue="" falsevalue="--noheaderline" checked="true" label="Output header line in the report?"> + </param> + + + </inputs> + <outputs> + <!-- For single files (forward/reverse mode) --> + <data name="outfile_single" format="fasta" from_work_dir="result_cryptogenotyper_report.fa" + label="${tool.name} ${marker} extracted fasta from ${primers.input.name.split('.')[0]}"> + <filter>primers['seqtype'] != 'contig'</filter> + </data> + <!-- For collections (contig mode) --> + <data name="outfile_collection" format="fasta" from_work_dir="result_cryptogenotyper_report.fa" + label="${tool.name} ${marker} extracted fastas"> + <filter>primers['seqtype'] == 'contig'</filter> + </data> + + <!-- For single files (forward/reverse mode) --> + <data name="outfile_report_single" format="tabular" from_work_dir="result_cryptogenotyper_report.txt" + label="${tool.name} ${marker} REPORT on ${primers.input.name.split('.')[0]}"> + <filter>primers['seqtype'] != 'contig'</filter> + </data> + <!-- For collections (contig mode) --> + <data name="outfile_report_collection" format="tabular" from_work_dir="result_cryptogenotyper_report.txt" + label="${tool.name} ${marker} REPORTS"> + <filter>primers['seqtype'] == 'contig'</filter> + </data> + + <!-- For single files (forward/reverse mode) --> + <data name="outfile_log_single" format="txt" from_work_dir="cryptogenotyper.log" + label="${tool.name} Run ${marker} log on ${primers.input.name.split('.')[0]}"> + <filter>show_log and primers['seqtype'] != 'contig'</filter> + </data> + <!-- For collections (contig mode) --> + <data name="outfile_log_collection" format="txt" from_work_dir="cryptogenotyper.log" + label="${tool.name} ${marker} logs"> + <filter>show_log and primers['seqtype'] == 'contig'</filter> + </data> + </outputs> + + <tests> + <!--TEST 1--> + <test expect_num_outputs="2"> + <param name="marker" value="18S"/> + <conditional name="primers"> + <param name="seqtype" value="forward"/> + <param name="input" value="P17705_Crypto16-2F-20170927_SSUF_G12_084.ab1"/> + </conditional> + <output name="outfile_report_single" ftype="tabular"> + <assert_contents> + <has_text_matching expression="C.parvum"/> + </assert_contents> + </output> + </test> + <!--TEST 2 --> + <test expect_num_outputs="2"> + <param name="marker" value="gp60"/> + <conditional name="primers"> + <param name="seqtype" value="forward"/> + <param name="input" value="P17705_gp60-Crypt14-1F-20170927_gp60F_G07_051.ab1"/> + </conditional> + <output name="outfile_report_single" ftype="tabular" > + <assert_contents> + <has_text_matching expression="C.parvum"/> + </assert_contents> + </output> + </test> + <!--TEST 3:Paired gp60 contig test --> + <test expect_num_outputs="2"> + <param name="marker" value="gp60"/> + <param name="primers|seqtype" value="contig"/> + <param name="primers|input"> + <collection type="paired"> + <element name="forward" value="P17705_gp60-Crypt14-1F-20170927_gp60F_G07_051.ab1"/> + <element name="reverse" value="P17705_gp60-Crypt14-1R-20170927_gp60R_H07_049.ab1"/> + </collection> + </param> + <output name="outfile_report_collection" ftype="tabular"> + <assert_contents> + <has_text_matching expression="C.parvum"/> + <has_text_matching expression="IIaA15G2R1"/> + </assert_contents> + </output> + </test> + <!--TEST 4:Paired 18S contig test --> + <test expect_num_outputs="2"> + <param name="marker" value="18S"/> + <param name="primers|seqtype" value="contig"/> + <param name="primers|input"> + <collection type="paired"> + <element name="forward" value="P17705_Crypto16-2F-20170927_SSUF_G12_084.ab1"/> + <element name="reverse" value="P17705_Crypto16-2R-20170927_SSUR_H12_082.ab1"/> + </collection> + </param> + <output name="outfile_report_collection" ftype="tabular"> + <assert_contents> + <has_text_matching expression="C.parvum"/> + <has_text_matching expression="Check for C. parvum TGA paralogs"/> + </assert_contents> + </output> + </test> + + + <!--TEST 5: 18S multi-fasta forward --> + <test expect_num_outputs="2"> + <param name="marker" value="18S"/> + <param name="primers|seqtype" value="forward"/> + <param name="primers|input" value="test_illumina_18S_F.fasta"/> + <output name="outfile_report_single" ftype="tabular"> + <assert_contents> + <has_n_lines n="5"/> + <has_text_matching expression="C.hominis"/> + <has_text_matching expression="forward"/> + </assert_contents> + </output> + </test> + + <!--TEST 6: 18S multi-fasta reverse --> + <test expect_num_outputs="2"> + <param name="marker" value="18S"/> + <param name="primers|seqtype" value="reverse"/> + <param name="primers|input" value="test_illumina_18S_R.fasta"/> + <output name="outfile_report_single" ftype="tabular"> + <assert_contents> + <has_n_lines n="5"/> + <has_text_matching expression="C.hominis"/> + <has_text_matching expression="reverse"/> + </assert_contents> + </output> + </test> + + <!--TEST 7: 18S multi-fasta contig --> + <test expect_num_outputs="2"> + <param name="marker" value="18S"/> + <param name="primers|seqtype" value="contig"/> + <param name="primers|input"> + <collection type="paired"> + <element name="forward" value="test_illumina_18S_F.fasta"/> + <element name="reverse" value="test_illumina_18S_R.fasta"/> + </collection> + </param> + <output name="outfile_report_collection" ftype="tabular"> + <assert_contents> + <has_text_matching expression="C.hominis"/> + <has_text_matching expression="contig"/> + </assert_contents> + </output> + </test> + + <!--TEST 8: gp60 multi-fasta forward --> + <test expect_num_outputs="2"> + <param name="marker" value="gp60"/> + <param name="primers|seqtype" value="forward"/> + <param name="primers|input" value="test_illumina_gp60_F1.fasta"/> + <output name="outfile_report_single" ftype="tabular"> + <assert_contents> + <has_n_lines n="3"/> + <has_text_matching expression="C.parvum"/> + <has_text_matching expression="forward"/> + <has_text_matching expression="IIaA16G3R1"/> + <has_text_matching expression="IIaA15G2R2"/> + + </assert_contents> + </output> + </test> + + <!--TEST 9: gp60 multi-fasta reverse --> + <test expect_num_outputs="2"> + <param name="marker" value="gp60"/> + <param name="primers|seqtype" value="reverse"/> + <param name="primers|input" value="test_illumina_gp60_R1.fasta"/> + <output name="outfile_report_single" ftype="tabular"> + <assert_contents> + <has_n_lines n="3"/> + <has_text_matching expression="C.parvum"/> + <has_text_matching expression="reverse"/> + <has_text_matching expression="IIaA3R1"/> + <has_text_matching expression="IIaA3R1"/> + </assert_contents> + </output> + </test> + + + + <!--TEST 10: gp60 multi-fasta reverse --> + <test expect_num_outputs="3"> + <param name="marker" value="gp60"/> + <param name="primers|seqtype" value="contig"/> + <param name="show_log" value="true"/> + <param name="primers|input"> + <collection type="paired"> + <element name="forward" value="test_illumina_gp60_F1.fasta"/> + <element name="reverse" value="test_illumina_gp60_R1.fasta"/> + </collection> + </param> + <output name="outfile_report_collection" ftype="tabular"> + <assert_contents> + <has_text_matching expression="C.parvum"/> + <has_text_matching expression="contig"/> + <has_text_matching expression="IIaA16G3R1"/> + <has_text_matching expression="IIaA15G2R2"/> + </assert_contents> + </output> + </test> + + + + + + </tests> + + + <help> + + +**Syntax** + +CryptoGenotyper is a standalone tool to *in-silico* determine species and subtype based on SSU rRNA (18S) and gp60 markers. + +**❗ Important:** To process **multiple input files** and generate **a single** combined report, please import and use the workflows available `here`_. + +.. _here: https://github.com/phac-nml/CryptoGenotyper/tree/main/CryptoGenotyper/GalaxyWorkflows + + +For a tutorial on how to use CryptoGenotyper, please refer to the `official tutorial`_. + +For more information, please visit https://github.com/phac-nml/CryptoGenotyper. + +.. _official tutorial: https://github.com/phac-nml/CryptoGenotyper/blob/docs/docs/CryptoGenotyperTutorial-CrownCopyright.pdf + +----- + +**Input:** + +AB1 or FASTA file(s) representing Cryptosporidium's SSU rRNA (18S) or gp60 locus can be provided as single-end reads (either forward or reverse only) or as paired-end reads in contig mode (both a forward and a reverse read for each sample). + +Optional: A custom reference database of SSU rRNA or gp60 locus in .fasta file format, to be used during the homology search for *Cryptosporidium* classification. + + +**Output:** + +A tabular report and a FASTA file containing the identification of the *Cryptosporidium* species/subtype and its corresponding sequence for each sample along with other relevant details. +The gp60 subtyping is based on the `Deciphering a cryptic minefield: a guide to Cryptosporidium gp60 subtyping publication`_. + +.. _`Deciphering a cryptic minefield: a guide to Cryptosporidium gp60 subtyping publication`: https://doi.org/10.1016/j.crpvbd.2025.100257 + + </help> + <citations> + <citation type="bibtex"> + @ARTICLE{githubCryptoGenotyper, + author = {Yanta, Christine A. and Bessonov, Kyrylo and Robinson, Guy and Troell, Karin and Guy, Rebecca A.}, + title = {CryptoGenotyper: a new bioinformatics tool to enhance Cryptosporidium identification}, + journal = {Food and waterborne parasitology}, + year = {2021}, + volume = {23}, + url = {https://doi.org/10.1016/j.fawpar.2021.e00115} + }</citation> + <citation type="bibtex"> + @ARTICLE{RobinsonGp60, + author = {Robinson, Gillian and Chalmers, Rachel M. and Elwin, Kirsty and Guy, Richard A. and Bessonov, Konstantin and Troell, Kristina and Xiao, Lihua}, + title = {Deciphering a cryptic minefield: A guide to Cryptosporidium gp60 subtyping}, + journal = {Current Research in Parasitology and Vector-Borne Diseases}, + year = {2025}, + volume = {7}, + url = {https://doi.org/10.1016/j.crpvbd.2025.100257} + } + }</citation> + </citations> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_illumina_18S_F.fasta Fri Sep 12 18:50:28 2025 +0000 @@ -0,0 +1,8 @@ +>M04527:274:000000000-DBMY7:1:1102:16042:18930 1:N:0:20 +AGTGACAAGAAATAACAATACAGGACTTTTTGGTTTTGTAATTGGAATGAGTTAAGTATAAACCCCTTTACAAGTATCAATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGTTAATAATTTATATAAAATATTTTGATGAATATTTATATAATATTAACATAATTCATATTACTATTTTTTTTTT +>M04527:274:000000000-DBMY7:1:1101:20679:16373 1:N:0:20 +AGTGACAAGAAATAACAATACAGGACTTTTTGGTTTTGTAATTGGAATGAGTTAAGTATAAACCCCTTTACAAGTATCAATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGTTAATAATTTATATAAAATATTTTGATGAATATTTATATAATATTAACATAATTCATATTACTATTTTTTTTTT +>M04527:274:000000000-DBMY7:1:1102:12557:11653 1:N:0:20 +AGTGACAAGAAATAACAATACAGGACTTTTTGGTTTTGTAATTGGAATGAGTTAAGTATAAACCCCTTTACAAGTATCAATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGTTAATAATTTATATAAAATATTTTGATGAATATTTATATAATATTAACATAATTCATATTACTATTTTTTTTAG +>M04527:274:000000000-DBMY7:1:1102:17887:8992 1:N:0:20 +AGTGACAAGAAATAACAATACAGGACTTTTTGGTTTTGTAATTGGAATGAGTTAAGTATAAACCCCTTTACAAGTATCAATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGTTAATAATTTATATAAAATATTTTGATGAATATTTATATAATATTAACATAATTCATATTACTATTTTTTTTTT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_illumina_18S_R.fasta Fri Sep 12 18:50:28 2025 +0000 @@ -0,0 +1,8 @@ +>M04527:274:000000000-DBMY7:1:1102:16042:18930 1:N:0:20 +AGTGACAAGAAATAACAATACAGGACTTTTTGGTTTTGTAATTGGAATGAGTTAAGTATAAACCCCTTTACAAGTATCAATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGTTAATAATTTATATAAAATATTTTGATGAATATTTATATAATATTAACATAATTCATATTACTATTTTTTTTTT +>M04527:274:000000000-DBMY7:1:1101:20679:16373 1:N:0:20 +AGTGACAAGAAATAACAATACAGGACTTTTTGGTTTTGTAATTGGAATGAGTTAAGTATAAACCCCTTTACAAGTATCAATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGTTAATAATTTATATAAAATATTTTGATGAATATTTATATAATATTAACATAATTCATATTACTATTTTTTTTTT +>M04527:274:000000000-DBMY7:1:1102:12557:11653 1:N:0:20 +AGTGACAAGAAATAACAATACAGGACTTTTTGGTTTTGTAATTGGAATGAGTTAAGTATAAACCCCTTTACAAGTATCAATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGTTAATAATTTATATAAAATATTTTGATGAATATTTATATAATATTAACATAATTCATATTACTATTTTTTTTAG +>M04527:274:000000000-DBMY7:1:1102:17887:8992 1:N:0:20 +AGTGACAAGAAATAACAATACAGGACTTTTTGGTTTTGTAATTGGAATGAGTTAAGTATAAACCCCTTTACAAGTATCAATTGGAGGGCAAGTCTGGTGCCAGCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGTTGCAGTTAAAAAGCTCGTAGTTGGATTTCTGTTAATAATTTATATAAAATATTTTGATGAATATTTATATAATATTAACATAATTCATATTACTATTTTTTTTTT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_illumina_gp60_F1.fasta Fri Sep 12 18:50:28 2025 +0000 @@ -0,0 +1,5 @@ +>M04527:274:000000000-DBMY7:1:1102:14324:28396 1:N:0:26 +TCCGCTGTATTCTCAGCCCCAGCCGTTCCACTCAGAGGAACTTTAAAGGATGTTCCTGTTGAGGGCTCATCATCGTCATCGTCATCGTCATCATCATCATCATCATCATCATCATCATCATCAACATCAACCGTCGCACCAGCAAATAAGGCAAGAACTGGAGAAGACGCAGAAGGCAGTCAAGATTCTAGTGGTACTGAAGCTTCTGGTAGCCAGGGTTCTGAAGAGGAAGGTAGTGAAGACGATGGCCA +>M04527:274:000000000-DBMY7:1:1102:20035:28200 1:N:0:26 +TCCGCTGTATTCTCAGCCCCTGCCGTTCCACTCAGAGGAACTTTAAAGGATGTTCCTGTTGAGGGCTCATCATCGTCATCGTCATCATCATCATCATCATCATCATCATCATCATCAACATCAACATCAACCGTCGCACCAGCAAATAAGGCAAGAACTGGAGAAGACGCAGAAGGCAGTCAAGATTCTAGTGGTACTGAAGCTTCTGGTAGCCAGGGTTCTGAAGAGGAAGGTAGTGAAGACGATGGCCA +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_illumina_gp60_R1.fasta Fri Sep 12 18:50:28 2025 +0000 @@ -0,0 +1,4 @@ +>M04527:274:000000000-DBMY7:1:1102:14324:28396 2:N:0:26 +CGAACCACATTACAAATGAAGTGCCGCATTCTTCTTTTGGAGTAGCTTCTATGGTTTCGGTAGTTGCGCCTTCACTTTGAGCTGGAGTAGTGGGTTGGGAAGCAGCACTAGTTTGGCCATCGTCTTCACTACCTTCCTCTTCAGAACCCTGGCTACCAGAAGCTTCAGTACCACTAGAATCTTGACTGCCTTCTGCGTCTTCTCCAGTTCTTGCCTTATTTGCTGGTGCGACGGTTGATGTTGATGATGAT +>M04527:274:000000000-DBMY7:1:1102:20035:28200 2:N:0:26 +CGAACCACATTACAAATGAAGTGCCGCATTCTTCTTTTGGAGTAGCTTCTATGGTTTCGGTAGTTGCGCCTTCACTTTGAGCTGGAGTAGTGGGTTGGGAAGCAGCACTAGTTTGGCCATCGTCTTCACTACCTTCCTCTTCAGAACCCTGGCTACCAGAAGCTTCAGTACCACTAGAATCTTGACTGCCTTCTGCGTCTTCTCCAGTTCTTGCCTTATTTGCTGGTGCGACGGTTGATGTTGATGTTGAT \ No newline at end of file