Mercurial > repos > jjohnson > split_to_collection
view split_tabular_to_collection.xml @ 0:f6254e4e155e draft default tip
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/split_to_collection commit b2ce04dd96d8b00103c23b58a4c6539a6b30809a-dirty
author | jjohnson |
---|---|
date | Thu, 26 Oct 2017 13:32:38 -0400 |
parents | |
children |
line wrap: on
line source
<tool id="split_tabular_to_collection" name="Split Tabular into Collection" version="0.1.0"> <description>by lines</description> <command><![CDATA[ #set $width = len(str($input.dataset.metadata.data_lines)) #if $skip_comment_lines: #set $skip = $input.dataset.metadata.comment_lines #else #set $skip = 0 #end if #set $offset = $skip + 1 awk 'NR > $skip{ print \$0 > (sprintf("%s_%.${width}d","${input.name}",int((NR-${offset})/${lines})*${lines}) ) }' $input ]]></command> <inputs> <param name="input" type="data" format="tabular" label="Tabular dataset to split"/> <param name="lines" type="integer" value="1000" min="1" label="Number of lines per output dataset"/> <param name="skip_comment_lines" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="Skip comment lines"/> </inputs> <outputs> <collection name="output_set" type="list" label="${input.name} Split List"> <discover_datasets pattern="__name__" ext="tabular" visible="false"/> </collection> </outputs> <tests> <test> <param name="input" value="input.tsv" ftype="tabular"/> <param name="lines" value="20"/> <output_collection name="output_set" type="list"> <element name="input.tsv_00"> <assert_contents> <has_text_matching expression="20\tt\tT" /> </assert_contents> </element> <element name="input.tsv_00"> <assert_contents> <has_text_matching expression="21\tu\tU" /> </assert_contents> </element> </output_collection> </test> </tests> <help><![CDATA[ Splits a tabular dataset into multiple datsets in a dataset collection. This can be used in a workflow to process datasets in the collection in parallel. ]]></help> </tool>