Mercurial > repos > bgruening > split_file_on_column
changeset 0:f30aca50efbb draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit 36d05738e78c68091b45779624734e6a47829856-dirty
author | bgruening |
---|---|
date | Wed, 26 Aug 2015 07:31:55 -0400 |
parents | |
children | b1914e537f3e |
files | split_file_on_column.tar.gz split_file_on_column.xml test-data/5cols.tabular tool_dependencies.xml |
diffstat | 4 files changed, 85 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split_file_on_column.xml Wed Aug 26 07:31:55 2015 -0400 @@ -0,0 +1,74 @@ +<tool id="tp_split_on_column" name="Split file" version="0.2"> + <description>according to the values of a column</description> + <requirements> + <requirement type="package" version="4.1.0">gnu_awk</requirement> + </requirements> + <command> +<![CDATA[ + awk -F'\t' '{print > \$$column ".$infile.ext" }' $infile +]]> + </command> + <inputs> + <param format="tabular" name="infile" type="data" label="File to select" /> + <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" /> + </inputs> + <outputs> + <collection name="split_output" type="list" label="Table split on first column"> + <discover_datasets pattern="__name_and_ext__" directory="." /> + </collection> + </outputs> + <!--outputs> + <data format="input" name="outfile" metadata_source="infile" label="${tool.name} on ${on_string}"/> + </outputs--> + <tests> + <test> + <param name="infile" value="5cols.tabular" ftype="tabular"/> + <param name="column" value="5" /> + <output_collection name="split_output" type="list"> + <element name="1"> + <assert_contents> + <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" /> + </assert_contents> + </element> + <element name="2"> + <assert_contents> + <has_text_matching expression="chr7\t56761\t56781\tcluster\t2" /> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help> +<![CDATA[ + +**What it does** + +This tool splits a file into different smaller files using a specific column. +It will work like the group tool, but every group is saved to its own file. + +----- + +**Example** + +Splitting on column 5 from this:: + + chr7 56632 56652 cluster 1 + chr7 56736 56756 cluster 1 + chr7 56761 56781 cluster 2 + chr7 56772 56792 cluster 2 + chr7 56775 56795 cluster 2 + +will produce 2 files with different clusters:: + + chr7 56632 56652 cluster 1 + chr7 56736 56756 cluster 1 + + + chr7 56761 56781 cluster 2 + chr7 56772 56792 cluster 2 + chr7 56775 56795 cluster 2 + + +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/5cols.tabular Wed Aug 26 07:31:55 2015 -0400 @@ -0,0 +1,5 @@ +chr7 56632 56652 cluster 1 +chr7 56736 56756 cluster 1 +chr7 56761 56781 cluster 2 +chr7 56772 56792 cluster 2 +chr7 56775 56795 cluster 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Aug 26 07:31:55 2015 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="gnu_awk" version="4.1.0"> + <repository changeset_revision="440a5170003f" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>