Mercurial > repos > bebatut > format_cd_hit_output
diff format_cd_hit_output.xml @ 0:bbd903996900 draft
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit ffb68b2ddd94854a34a2533105f7bc08884c6e38-dirty
author | bebatut |
---|---|
date | Wed, 27 Jan 2016 03:28:42 -0500 |
parents | |
children | 4ba41bcee051 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/format_cd_hit_output.xml Wed Jan 27 03:28:42 2016 -0500 @@ -0,0 +1,124 @@ +<tool id="format_cd_hit_output" name="Format cd-hit outputs" version="0.1.0"> + <requirements> + </requirements> + + <stdio> + <exit_code range="1:" /> + </stdio> + + <version_command><![CDATA[python -version]]></version_command> + + <command><![CDATA[ + python $__tool_directory__/format_cd_hit_output.py + --input_cluster_info $input_cluster_info + + #if $rename_representative_sequences.test + --input_representative_sequences $rename_representative_sequences.input_representative_sequences + --output_representative_sequences $output_representative_sequences + #end if + + #if $extract_category_distribution.test + --input_mapping $extract_category_distribution.input_mapping + --output_category_distribution $output_category_distribution + --number_sum $extract_category_distribution.number_sum + #end if + ]]> + </command> + + <inputs> + <param type="data" format="txt" name="input_cluster_info" label="Cluster + info"/> + + <conditional name="rename_representative_sequences"> + <param name='test' type='boolean' checked="true" + label="Rename representative sequences with the + corresponding cluster name?" help=""/> + <when value="true"> + <param type="data" format="fasta" + name="input_representative_sequences" + label="Representative sequences"/> + </when> + <when value="false" /> + </conditional> + + <conditional name="extract_category_distribution"> + <param name='test' type='boolean' checked="true" + label="Extract category distribution of each + cluster?" help=""/> + <when value="true"> + <param type="data" format="tabular" name="input_mapping" + label="Mapping file" help="The mapping file is a tabular + file with 2 columns. First column contains the sequence + names and the second one the corresponding category"/> + <param name='number_sum' type='boolean' checked="true" + label="Sum sequence number + for each category?" help="The alternative is the sum of size + for sequences in each category (if the size information is + available in sequence name)"/> + </when> + <when value="false" /> + </conditional> + </inputs> + + <outputs> + <data name="output_representative_sequences" format="fasta" + label="${tool.name} on ${on_string}: Renamed representative sequences"> + <filter>((rename_representative_sequences['test']))</filter> + </data> + <data name="output_category_distribution" format="tabular" + label="${tool.name} on ${on_string}: Category distribution"> + <filter>((extract_category_distribution['test']))</filter> + </data> + </outputs> + + <tests> + <test> + <param name="input_representative_sequences" + value="input_representative_sequences.fasta"/> + <param name="rename_representative_sequences.test" value="yes"/> + <param name="input_cluster_info" value="input_cluster_info.txt"/> + <param name="extract_category_distribution.test" value="yes"/> + <param name="input_mapping_file" value="input_mapping.txt"/> + <output name="output_representative_sequences" + file="output_representative_sequences.fasta"/> + <output name="output_category_distribution" + file="output_category_distribution.txt"/> + </test> + </tests> + + <help><![CDATA[ + +**What it does** + +This tool format cd-hit outputs (cluster information and cluster representative +sequences) to rename the representative sequences with cluster name and/or extract +category distribution of each cluster + +----- + +**Inputs** + +The tool takes as input: + + - The cd-hit output file with cluster information + - The cd-hit output file with representative sequences for each cluster (optional) + - A mapping file in tabular format with first column being the sequence names + (corresponding to the ones in cluster information file) and the second column being + the corresponding categories (optional) + +----- + +**Outputs** + +The tool generates different outputs given the choosen parameters: + + - A file with representative sequences of each cluster named with the cluster name + - A tabular file with lines corresponding to clusters, columns to categories (and + one column with sequence number in the cluster), and cases to number of sequences + of the given category in the cluster + +]]> + </help> + + <citations></citations> +</tool> \ No newline at end of file