diff format_cd_hit_output.xml @ 0:bbd903996900 draft

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit ffb68b2ddd94854a34a2533105f7bc08884c6e38-dirty
author bebatut
date Wed, 27 Jan 2016 03:28:42 -0500
parents
children 4ba41bcee051
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/format_cd_hit_output.xml	Wed Jan 27 03:28:42 2016 -0500
@@ -0,0 +1,124 @@
+<tool id="format_cd_hit_output" name="Format cd-hit outputs" version="0.1.0">
+    <requirements>
+    </requirements>
+
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+
+    <version_command><![CDATA[python -version]]></version_command>
+
+    <command><![CDATA[
+        python $__tool_directory__/format_cd_hit_output.py
+            --input_cluster_info $input_cluster_info
+
+            #if $rename_representative_sequences.test
+                --input_representative_sequences $rename_representative_sequences.input_representative_sequences
+                --output_representative_sequences $output_representative_sequences
+            #end if
+
+            #if $extract_category_distribution.test
+                --input_mapping $extract_category_distribution.input_mapping
+                --output_category_distribution $output_category_distribution
+                --number_sum $extract_category_distribution.number_sum
+            #end if
+    ]]>
+    </command>
+
+    <inputs>
+        <param type="data" format="txt" name="input_cluster_info" label="Cluster
+            info"/>
+
+        <conditional name="rename_representative_sequences">
+            <param name='test' type='boolean' checked="true" 
+                label="Rename representative sequences with the 
+                corresponding cluster name?" help=""/>
+            <when value="true">
+                <param type="data" format="fasta" 
+                    name="input_representative_sequences"
+                    label="Representative sequences"/>
+            </when>
+            <when value="false" />
+        </conditional>
+
+        <conditional name="extract_category_distribution">
+            <param name='test' type='boolean' checked="true" 
+                label="Extract category distribution of each 
+                cluster?" help=""/>
+            <when value="true">
+                <param type="data" format="tabular" name="input_mapping" 
+                    label="Mapping file" help="The mapping file is a tabular 
+                    file with 2 columns. First column contains the sequence 
+                    names and the second one the corresponding category"/>
+                <param name='number_sum' type='boolean' checked="true" 
+                    label="Sum sequence number 
+                    for each category?" help="The alternative is the sum of size
+                    for sequences in each category (if the size information is
+                    available in sequence name)"/>
+            </when>
+            <when value="false" />
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="output_representative_sequences" format="fasta"
+            label="${tool.name} on ${on_string}: Renamed representative sequences">
+            <filter>((rename_representative_sequences['test']))</filter>
+        </data>
+        <data name="output_category_distribution" format="tabular" 
+            label="${tool.name} on ${on_string}: Category distribution">
+            <filter>((extract_category_distribution['test']))</filter>
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_representative_sequences" 
+                value="input_representative_sequences.fasta"/>
+            <param name="rename_representative_sequences.test" value="yes"/>
+            <param name="input_cluster_info" value="input_cluster_info.txt"/>
+            <param name="extract_category_distribution.test" value="yes"/>
+            <param name="input_mapping_file" value="input_mapping.txt"/>
+            <output name="output_representative_sequences" 
+                file="output_representative_sequences.fasta"/>
+            <output name="output_category_distribution" 
+                file="output_category_distribution.txt"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+
+**What it does**
+
+This tool format cd-hit outputs (cluster information and cluster representative 
+sequences) to rename the representative sequences with cluster name and/or extract 
+category distribution of each cluster
+
+-----
+
+**Inputs**
+
+The tool takes as input:
+
+ - The cd-hit output file with cluster information
+ - The cd-hit output file with representative sequences for each cluster (optional)
+ - A mapping file in tabular format with first column being the sequence names 
+   (corresponding to the ones in cluster information file) and the second column being 
+   the corresponding categories (optional)
+
+-----
+
+**Outputs**
+
+The tool generates different outputs given the choosen parameters:
+
+ - A file with representative sequences of each cluster named with the cluster name
+ - A tabular file with lines corresponding to clusters, columns to categories (and
+   one column with sequence number in the cluster), and cases to number of sequences 
+   of the given category in the cluster
+
+]]>
+    </help>
+
+    <citations></citations>
+</tool>
\ No newline at end of file