changeset 1:b6a241c73012 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tools/coverm commit 2388df7187533fb66b7729730340d2eb7b93c112
author iuc
date Tue, 24 Jan 2023 12:32:09 +0000
parents 5901cc218127
children e0b14520e690
files coverm_genome.xml macros.xml test-data/test4_cluster.tsv test-data/test4_rep.fa test-data/test4_rep1.fa test-data/test4_rep2.fa test-data/test4_rep3.fa test-data/test5.tsv
diffstat 7 files changed, 73 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/coverm_genome.xml	Tue Apr 26 15:25:04 2022 +0000
+++ b/coverm_genome.xml	Tue Jan 24 12:32:09 2023 +0000
@@ -1,5 +1,5 @@
-<tool id="coverm_genome" name="CoverM-GENOME" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
-    <description>read coverage and relative abundance calculator focused on metagenomics applications</description>
+<tool id="coverm_genome" name="CoverM genome" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Calculate coverage of individual genomes</description>
     <macros>
         <import>macros.xml</import>
     </macros>
@@ -135,7 +135,7 @@
                     ln -s '$input_file' '$file_path' &&
                     $ref_files.append($file_path)
                 #end for
-            #if $reads.genome.add_genome.add_genome
+            #if $reads.genome.add_genome.add_genome == "true"
                 mkdir -p genomes &&
                 #set genome_files = []
                 #if $reads.genome.add_genome.add_genomic == "history"
@@ -156,7 +156,8 @@
                 #end if
             #end if
         #end if
-    
+        
+        mkdir ./representative-fasta/ &&
         coverm genome
             #if $reads.read_type == 'paired'
                 -1
@@ -240,7 +241,7 @@
                         -s "$reads.genome.cond_single_genome.separator"
                     #end if
                 #end if
-                #if $reads.genome.add_genome.add_genome
+                #if $reads.genome.add_genome.add_genome == "true"
                     -f
                     #for $genome in $genome_files
                         '${genome}'
@@ -255,27 +256,27 @@
 
             $derep.dereplicate
             #if $derep.checkm_tab_table:
-                --chekm-tab-table $derep.checkm_tab_table
+                --chekm-tab-table '$derep.checkm_tab_table'
             #end if
             #if $derep.genome_info:
-                --genome-info $derep.genome_info
+                --genome-info '$derep.genome_info'
             #end if
-            #if $derep.min_completeness:
+            #if $derep.min_completeness != "":
                 --min-completeness $derep.min_completeness
             #end if
-            #if $derep.max_contamination:
+            #if $derep.max_contamination != "":
                 --max-contamination $derep.max_contamination
             #end if
-            #if $derep.dereplication_ani:
+            #if $derep.dereplication_ani != "":
                 --dereplication-ani $derep.dereplication_ani
             #end if
-            #if $derep.dereplication_aligned_fraction:
+            #if $derep.dereplication_aligned_fraction != "":
                 --dereplication-aligned-fraction $derep.dereplication_aligned_fraction
             #end if
-            #if $derep.dereplication_fragment_length:
+            #if $derep.dereplication_fragment_length != "":
                 --dereplication-fragment-length $derep.dereplication_fragment_length
             #end if
-            #if $derep.dereplication_prethreshold_ani:
+            #if $derep.dereplication_prethreshold_ani != "":
                 --dereplication-prethreshold-ani $derep.dereplication_prethreshold_ani
             #end if
             #if $derep.dereplication_quality_formula:
@@ -315,13 +316,13 @@
             --methods $cov.relative_abundance $cov.mean $cov.cond_methods.trimmed_mean $cov.covered_bases $cov.covered_fraction
             $cov.variance $cov.length $cov.count $cov.metabat $cov.coverage_histogram $cov.reads_per_base 
             $cov.rpkm $cov.tpm
-            #if $cov.min_covered_fraction:
+            #if $cov.min_covered_fraction != "":
                 --min-covered-fraction $cov.min_covered_fraction
             #end if
-            #if $cov.contig_end_exclusion:
+            #if $cov.contig_end_exclusion != "":
                 --contig-end-exclusion $cov.contig_end_exclusion
             #end if
-            #if $cov.cond_methods.trimmed_mean
+            #if $cov.cond_methods.trimmed_mean == "trimmed_mean"
                 #if $cov.cond_methods.trim_min:
                     --trim-min $cov.cond_methods.trim_min
                 #end if
@@ -334,15 +335,18 @@
                 --output-format $out.output_format
             #end if
             #if $out.dereplication_output_cluster_definition:
-                --dereplication-output-cluster-definition
+                --dereplication-output-cluster-definition '$cluster_definition'
             #end if
             #if $out.dereplication_output_representative_fasta_directory_copy:
-                --dereplication-output-representative-fasta-directory-copy .
+                --dereplication-output-representative-fasta-directory-copy ./representative-fasta/
             #end if
             $out.no_zeros
             --output-file output.tsv
             --threads \${GALAXY_SLOTS:-1}
-            2> stdout.txt
+
+        #if $derep.dereplicate and $out.dereplication_output_cluster_definition
+            && sed -i -e 's@genomes/@@g; s/\.fna//g' '$cluster_definition'
+        #end if
     ]]></command>
     <inputs>
         <expand macro="reads" />
@@ -394,12 +398,16 @@
     </inputs>
     <outputs>
         <data name="output1" format="tsv" from_work_dir="./output.tsv"/>
-        <data name="cluster-definition" format="tsv">
-            <filter>out['dereplication_output_cluster_definition']</filter>
+        <data name="cluster_definition" format="tsv" label="${tool.name} on ${on_string}: cluster definition">
+            <filter>derep['dereplicate'] and out['dereplication_output_cluster_definition']</filter>
         </data>
+        <collection name="representative_fasta" type="list" label="${tool.name} on ${on_string}: representative fasta">
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.fna" format="fasta" directory="representative-fasta" />
+            <filter>derep['dereplicate'] and out['dereplication_output_representative_fasta_directory_copy']</filter>
+        </collection>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="1">
             <conditional name="reads">
                 <param name="read_type" value="paired_collection"/>
                 <param name="paired_reads">
@@ -489,7 +497,7 @@
                 </assert_contents>
             </output>
         </test>
-        <test expect_num_outputs="1">
+        <test expect_num_outputs="3">
             <conditional name="reads">
                 <param name="read_type" value="paired_collection"/>
                 <param name="paired_reads">
@@ -513,7 +521,20 @@
             <section name="cov">
                 <param name="mean" value="true"/>
             </section>
+            <section name="derep">
+                <param name="dereplicate" value="true"/>
+            </section>
+            <section name="out">
+                <param name="dereplication_output_cluster_definition" value="true"/>
+                <param name="dereplication_output_representative_fasta_directory_copy" value="true"/>
+            </section>
             <output name="output1" file="test3.tsv" ftype="tsv"/>
+            <output name="cluster_definition" ftype="tsv" value="test4_cluster.tsv"/>
+            <output_collection name="representative_fasta" type="list" count="3">
+                <element name="genome1" file="test4_rep1.fa" ftype="fasta" />
+                <element name="genome2" file="test4_rep2.fa" ftype="fasta" />
+                <element name="genome3" file="test4_rep3.fa" ftype="fasta" />
+            </output_collection>
         </test>
         <test expect_num_outputs="1">
             <conditional name="reads">
--- a/macros.xml	Tue Apr 26 15:25:04 2022 +0000
+++ b/macros.xml	Tue Jan 24 12:32:09 2023 +0000
@@ -1,12 +1,13 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <requirement type="package">coverm</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">coverm</requirement>
         </requirements>
     </xml>
     <token name="@INPUT_FORMATS@">fasta,fastq,fastq.gz,fasta.gz</token>
-    <token name="@TOOL_VERSION@">0.2.1</token>
+    <token name="@TOOL_VERSION@">0.6.1</token>
     <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">22.01</token>
     <xml name="citation">
         <citations>
             <citation type="bibtex">
@@ -96,7 +97,10 @@
                     </when>
                 </conditional>                    
                 <conditional name="add_genome">
-                    <param name="add_genome" type="boolean" label="Add additional Genome Files"/>
+                    <param name="add_genome" type="select" label="Add additional Genome Files">
+                        <option value="true">Yes</option>
+                        <option value="false" selected="true">No</option>
+                    </param>
                     <when value="true">
                         <conditional name="add_genomic">
                             <param type="select" label="Reference genome source" name="source">
@@ -186,7 +190,7 @@
     <xml name="add_reads">
         <section name="add_reads" title="Add an additional read">
             <conditional name="extra_read">
-                <param type="select" label="Read type" optional="true" name="read_type">
+                <param type="select" label="Read type" name="read_type">
                     <option value="none" selected="true">None</option>
                     <option value="paired">Paired end</option>
                     <option value="paired_collection">Paired collection</option>
@@ -251,7 +255,10 @@
             <param name="relative_abundance" type="boolean" falsevalue="" truevalue="relative_abundance" label="Relative abundance (default)"/>
             <param name="mean" type="boolean" falsevalue="" truevalue="mean" label="Mean"/>
             <conditional name="cond_methods">
-                <param name="trimmed_mean" type="boolean" falsevalue="" truevalue="trimmed_mean" label="Trimmed mean"/>
+                <param name="trimmed_mean" type="select" label="Trimmed mean">
+                    <option value="trimmed_mean">Yes</option>
+                    <option value="" selected="true">No</option>
+                </param>
                 <when value="trimmed_mean">
                     <param name="trim_min" type="integer" min="0" value="5" label="Trim min" help="Remove this smallest fraction of positions when calculating trimmed_mean default: 5"/>
                     <param name="trim_max" type="integer" min="0" value="95" label="Trim max" help="Maximum fraction for trimmed_mean calculations default: 95"/>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test4_cluster.tsv	Tue Jan 24 12:32:09 2023 +0000
@@ -0,0 +1,3 @@
+genome1	genome1
+genome2	genome2
+genome3	genome3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test4_rep1.fa	Tue Jan 24 12:32:09 2023 +0000
@@ -0,0 +1,4 @@
+>random_sequence_length_500_1
+GAACATGGGGGGTTGTACACCGCTGGCAGGATATAAAAGAGCGAATTCTTCGTGGTGGGATGCAGAATGTTCATGCACCGCAAACCTTAGGCCCTACCCAATAACAGTCAGCTCCTCCCCTAGAAATACCGTGTCTACGTGTATGTAATGTCAAAACCGCGCTGGTCCATAACTGACCACGACATACGCTTAAGCGACGCAGGTCCCCGTTTATGGACGCAGTAAGATGATATGACAGTAACAGAACGAACGCCTTGATAGCCACTTGGCCTGGTGCCAACGGGGAACGACCTATAGGGGCCAAATTGATTCCAGCTGGTCGGGTGCGCGTATCATCCAATCCAGCCAGGCAGCAACCTCGTATAAGCATCCACACAGCGAGATTTCCAGGACCTTTGAGACGCATCTAGCAGTTTTCCCCCCTTCCACTGACACGTAGTTGATCAAATATCTCGTGTCCACCGGAATCCTCAGAGATTCTCTTCTGTCAAAAAACACTC
+>random_sequence_length_500_2
+CGCTCATGTGCCAAGCATGGACGCCGGAGACACATTTGCTAGTCGGGATGTACGCGGTCTTGTCCACTCTAAAGGGTAAATACAGGAAGCGGTCTCTAGGGACGGGCACTTAATGTAGTTGCATTTATCGATCGCGGGTCCAGATCTGATAGCCGTGCTATGCACATACGATGCTCATAACGCGCGTGATTCCCTTCATTAAGGTACGGCTACCCTGCACCTCTAGACTTGTTTGTACTATCTAGAGCAGTGAGTCGAGCGTCGACAATAGGGTCAGGGCCGGGTACTATTTTGAGCTTTACGGTAATTGCCTTGGGCTAGTAAAACCGCTTCTATACCGCAGACTCATCGAATAATGATTCCTGCAAAACGAGCACGATACGGGAGTTCCTTATGACCAGTCTAATGGTCTCAAGGGTCTCCCCCGGGGAATTACGGTGCCAAGAATGTCTCTGTAATTGACCTAAGTTGTCTGTCGATCCAGAAAATCTGACCAAGAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test4_rep2.fa	Tue Jan 24 12:32:09 2023 +0000
@@ -0,0 +1,4 @@
+>random_sequence_length_500_1
+TCATGAACGGTAGTCAGTCAAGTTTCGCCAGCAATAGGCACAGCTTGTATGGAGGGGTTTAGTCGCCGGAGTGTTACGAGTTTGACGATCCAAAGTTGGAGTCGCCGTGATAATTAATGATATTGCGTCTTCGGTGGGCACGATGAGCCAATCTGCGTCGCACTAGTTTACGTGTTAACGTAGTGAACCTAGACATGACGGGACTATGGCAGTAAGCCGTCACAGCATGTCGGTTCCGGACCACGAGGGTCCATAAGACTAACGATCCACGCTAATTGGCGGATTTACCAAGGCGTCTTCTCTGCGTGTAAGACCCACACCGTGCCTCTGTACCTATGGACGTGCGCGACCATCTTATGATATGAGCATCACATGTTTAATAGAGCGTCCGGTGTCCCTGTAATGCCACCACTTCTGTTTCAACGGACGCTGAATCTAAGCCGATATTTTTGCCTTGATCAGAACGGGGCAGTGAAGGAGCTGCACTAAACTGGCCCTTA
+>random_sequence_length_500_2
+GGATCGGATTACTAATGTTTTTCCAGTCGCCTGTTACTCCAGGCGCGGATGCGAGTGGTCTTTGCCAAGTAGCCCCATGCCACCTGTACAAGGCGGACAAACGCGGATGCGTTTGCATAAATTACCACCGTAAGATACGTTGGCGTATTGCCGTACAAAGGCGGACAGTCCAGATATTTCTTCGGATGTAATTATTGTAGGCGTGCACAACCTACTAGGCGAAAGAGCGGCCCGTTAATCGCGGGTCGCGCCGTGCCCAATTGAACACTTTGCCACGCACGTGACTGGCGGACGGCTCACACGCTATCTCCAAAAATCTACACTTCCGGAGCTAGGCTCCATGACAGGGATGTTCTTATTCAACAAGATGGAGTGCTAATATTAACAATCGACGACACAGGTGAAAAACCTCTTGTCATGTAGCTAGCAGTTATGCGGGCGTACACACTAGTGCTAATCTACTAATCATCCACGGGCCCCTGCTAGAGATCCAAAGGGGT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test4_rep3.fa	Tue Jan 24 12:32:09 2023 +0000
@@ -0,0 +1,4 @@
+>random_sequence_length_500_1
+AGAGGGAGCCTTGCGTCGGTGCTCCACGTCGTAAAGCCGTTGAGACCGCGATTACTTTGTTTATTTTCAATTCACTTGTCACAATCAGAAGGACACCGGCGACGGCCAGTTTCCATCGCCCGTCCAGGGTATCGGGGTACCTCTCTACTCGAACGTCTGCGGGGGTTTATCGGGTCTTTACAGGGGGACCAAATGGTTGGACACGGTACCTCAACGCGATAATATATGTCCTGAGTGTGATCATCGGCTGCTTGATTGACTAGTAAGTAAATAACTCCGCCCGCCGAAAGTGACCATTCTAGGAAAATACCCTCTAGCCTCTTGCTTTTGTACGGACTCACTTTCAACGTCCAGGGGTTGCGAACTAATACAGTGAAAGCGATGACCGACAGTAGCAGTTGAGTCTCCCAATGCACCGGTCCCTAACCTCAACCCGGCGTCTAGTCTGACCAGCCTATACAGATAGCAAACAACGTGGACGCGACGGACATGACGAAATA
+>random_sequence_length_500_2
+GCTACTTTACCGTGCGGAAGTAGGTGGGTAGGCCCACATCCTCGTCCTTCAGTGGGCACCCATCTCTCTAAGTACATCTCAAGTGAGGAGGGCTGAGAAAATTGGACGATCTAGTGGAAGCGCGCCGAAATATGGCCTGAGTGAGATCGACCCCGAGGAGCGAGCTCGTTTTCCGAAGTTCGTTATGAGTATGGCGTTCGTTGCTGGCCGAGTACCCCCTGGTGACGTAAAGTGTTTATTTACACAGCTACTTCTCCGAACCAACGACTTATATGTGGGTCCCTGATTGCCTCCCATAGGTCCGGCATAGTTAAGAAAGTTAACGGAACCAAAGTCCAGTACATGGAGTTCTATGATAGACAGCTGTCTCCATTCCCGTATCTGCCAAAGAGATTAGATCCTAGTTGATCCCAGCAGCTACTCGTAATGACAGGATCCGGCGTGTCACTATACGACGCTTGCGGGAGGATGGTCGCCCTGCCCGTACCGTTACTTAGATC
--- a/test-data/test5.tsv	Tue Apr 26 15:25:04 2022 +0000
+++ b/test-data/test5.tsv	Tue Jan 24 12:32:09 2023 +0000
@@ -0,0 +1,2 @@
+Genome	2seqs.bad_read.1.with_supplementary Read Count
+genome1	20