Mercurial > repos > leomrtns > checkm_lineage_wf

diff checkm_lineage_wf.xml @ 0:cb30bd36bfd0 draft default tip
planemo upload
author: leomrtns
date: Wed, 02 Oct 2019 10:51:58 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/checkm_lineage_wf.xml	Wed Oct 02 10:51:58 2019 -0400
@@ -0,0 +1,122 @@
+<tool id="checkm_lineage_wf" name="checkm lineage_wf" version="@TOOL_VERSION@">
+  <description>Runs tree, lineage_set, analyze, qa</description>
+  <macros>
+    <import>macros.xml</import>
+    <import>plot_macros.xml</import>
+    <import>tool-macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <expand macro="stdio"/>
+  <expand macro="version_command"/>
+  <command><![CDATA[
+echo {\"dataRoot\": \"$checkm_databases.fields.path\", \"remoteManifestURL\": \"https://data.ace.uq.edu.au/public/CheckM_databases/\", \"manifestType\": \"CheckM\", \"localManifestName\": \".dmanifest\", \"remoteManifestName\": \".dmanifest\"} > \${CONDA_PREFIX}/lib/python2.7/site-packages/checkm/DATA_CONFIG &&
+mkdir bin_folder && 
+#for $k, $f in enumerate($bin_folder)
+    #if $f
+        ln -s "${f}" bin_folder/${k}.fna &&
+    #end if
+#end for
+
+checkm lineage_wf 
+bin_folder
+'$output.extra_files_path'
+$reduced_tree
+$ali
+$nt
+$genes
+#if $unique and $unique is not None:
+--unique $unique
+#end if
+#if $multi and $multi is not None:
+--multi $multi
+#end if
+$force_domain
+$no_refinement
+$individual_markers
+$skip_adj_correction
+$skip_pseudogene_correction
+#if $aai_strain and $aai_strain is not None:
+--aai_strain $aai_strain
+#end if
+#if $alignment_file == "true":
+--alignment_file $alignment_file_output
+#end if
+$ignore_thresholds
+#if $e_value and $e_value is not None:
+--e_value $e_value
+#end if
+#if $length and $length is not None:
+--length $length
+#end if
+--tab_table
+--threads \${GALAXY_SLOTS:-1}
+> '$output'
+#if str($ali) == "--ali" or str($nt) == "--nt":
+&&if [ -d '$output.extra_files_path' ]; then
+ cp -r '$output.extra_files_path' out_folder;
+fi
+#end if
+
+## link hmmer alignments per bin in one dir to make them discoverable
+#if str($ali) == "--ali":
+&& for k in `ls out_folder/bins/`; do
+    if [ -f out_folder/bins/\$k/hmmer.tree.txt ]; then 
+        ln -s \$k/hmmer.tree.txt out_folder/bins/hmmer.tree.\$k.txt;
+    fi; 
+    if [ -f out_folder/bins/\$k/hmmer.analyze.txt ]; then 
+        ln -s \$k/hmmer.analyze.txt out_folder/bins/hmmer.analyze.\$k.txt;
+    fi; 
+done
+#end if## link nucleotide sequences per bin in one dir to make them discoverable
+#if str($nt) == "--nt":
+&& for k in `ls out_folder/bins/`; do
+    if [ -f out_folder/bins/\$k/genes.fna ]; then 
+        ln -s \$k/genes.fna out_folder/bins/\$k.genes.fna;
+    fi; 
+done
+#end if]]></command>
+  <inputs>
+    <param name="bin_folder" type="data" label="bins" multiple="true" format="fasta"/>
+    <param argument="--reduced_tree" checked="false" label="use reduced tree (requires &lt;16GB of memory) for determining lineage of each bin" name="reduced_tree" type="boolean" truevalue="--reduced_tree" falsevalue=""/>
+    <param argument="--ali" checked="false" label="generate HMMER alignment file for each bin" name="ali" type="boolean" truevalue="--ali" falsevalue=""/>
+    <param argument="--nt" checked="false" label="generate nucleotide gene sequences for each bin" name="nt" type="boolean" truevalue="--nt" falsevalue=""/>
+    <param argument="--genes" checked="false" label="bins contain genes as amino acids instead of nucleotide contigs" name="genes" type="boolean" truevalue="--genes" falsevalue=""/>
+    <param argument="--unique" label="minimum number of unique phylogenetic markers required to use lineage-specific marker set" name="unique" optional="true" type="integer" value="10"/>
+    <param argument="--multi" label="maximum number of multi-copy phylogenetic markers before defaulting to domain-level marker set" name="multi" optional="true" type="integer" value="10"/>
+    <param argument="--force_domain" checked="false" label="use domain-level sets for all bins" name="force_domain" type="boolean" truevalue="--force_domain" falsevalue=""/>
+    <param argument="--no_refinement" checked="false" label="do not perform lineage-specific marker set refinement" name="no_refinement" type="boolean" truevalue="--no_refinement" falsevalue=""/>
+    <param argument="--individual_markers" checked="false" label="treat marker as independent (i.e., ignore co-located set structure)" name="individual_markers" type="boolean" truevalue="--individual_markers" falsevalue=""/>
+    <param argument="--skip_adj_correction" checked="false" label="do not exclude adjacent marker genes when estimating contamination" name="skip_adj_correction" type="boolean" truevalue="--skip_adj_correction" falsevalue=""/>
+    <param argument="--skip_pseudogene_correction" checked="false" label="skip identification and filtering of pseudogenes" name="skip_pseudogene_correction" type="boolean" truevalue="--skip_pseudogene_correction" falsevalue=""/>
+    <param argument="--aai_strain" label="AAI threshold used to identify strain heterogeneity" name="aai_strain" optional="true" type="float" value="0.9"/>
+    <param argument="--alignment_file" label="produce file showing alignment of multi-copy genes and their AAI identity" name="alignment_file" type="boolean" truevalue="true" falsevalue="false" checked="false" />
+    <param argument="--ignore_thresholds" checked="false" label="ignore model-specific score thresholds" name="ignore_thresholds" type="boolean" truevalue="--ignore_thresholds" falsevalue=""/>
+    <param argument="--e_value" label="e-value cut off" name="e_value" optional="true" type="float" value="1e-10"/>
+    <param argument="--length" label="percent overlap between target and query" name="length" optional="true" type="float" value="0.7"/>
+
+    <param label="Select a database" name="checkm_databases" type="select">
+      <options from_data_table="checkm_databases">
+        <validator message="No database is available" type="no_options" />
+      </options>
+    </param>
+
+
+  </inputs>
+  <outputs>
+    <data  name="alignment_file_output" format="txt" hidden="false" label="${tool.name} on ${on_string} (alignment)"><filter>alignment_file</filter></data>
+    <collection name="hmmer_nucleotide_per_bin" type="list" label="${tool.name} on ${on_string} (nucleotide gene sequences per bin)">
+      <filter>nt</filter>
+      <discover_datasets directory="out_folder/bins/" pattern="(?P&lt;designation&gt;.+).genes.fna" ext="fasta" />
+    </collection>
+    <collection name="hmmer_alignment_per_bin" type="list" label="${tool.name} on ${on_string} (HMMER alignments per bin)">
+      <filter>ali</filter>
+      <discover_datasets directory="out_folder/bins/" pattern="hmmer.(?P&lt;designation&gt;.+).txt" ext="txt" />
+    </collection>
+    <data name="output" format="tabular" hidden="false" label="${tool.name} on ${on_string}"/>
+  </outputs>
+  <tests>
+  <expand macro="tests"/>
+  </tests>
+  <help><![CDATA[Example: checkm lineage_wf ./bins ./output]]></help>
+  <expand macro="citations"/>
+</tool>
author	leomrtns
date	Wed, 02 Oct 2019 10:51:58 -0400
parents
children