comparison split_tabular_to_collection.xml @ 0:f6254e4e155e draft default tip

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/split_to_collection commit b2ce04dd96d8b00103c23b58a4c6539a6b30809a-dirty
author jjohnson
date Thu, 26 Oct 2017 13:32:38 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f6254e4e155e
1 <tool id="split_tabular_to_collection" name="Split Tabular into Collection" version="0.1.0">
2 <description>by lines</description>
3 <command><![CDATA[
4 #set $width = len(str($input.dataset.metadata.data_lines))
5 #if $skip_comment_lines:
6 #set $skip = $input.dataset.metadata.comment_lines
7 #else
8 #set $skip = 0
9 #end if
10 #set $offset = $skip + 1
11 awk 'NR > $skip{ print \$0 > (sprintf("%s_%.${width}d","${input.name}",int((NR-${offset})/${lines})*${lines}) ) }' $input
12 ]]></command>
13 <inputs>
14 <param name="input" type="data" format="tabular" label="Tabular dataset to split"/>
15 <param name="lines" type="integer" value="1000" min="1" label="Number of lines per output dataset"/>
16 <param name="skip_comment_lines" type="boolean" truevalue="yes" falsevalue="no" checked="true"
17 label="Skip comment lines"/>
18 </inputs>
19 <outputs>
20 <collection name="output_set" type="list" label="${input.name} Split List">
21 <discover_datasets pattern="__name__" ext="tabular" visible="false"/>
22 </collection>
23 </outputs>
24 <tests>
25 <test>
26 <param name="input" value="input.tsv" ftype="tabular"/>
27 <param name="lines" value="20"/>
28 <output_collection name="output_set" type="list">
29 <element name="input.tsv_00">
30 <assert_contents>
31 <has_text_matching expression="20\tt\tT" />
32 </assert_contents>
33 </element>
34 <element name="input.tsv_00">
35 <assert_contents>
36 <has_text_matching expression="21\tu\tU" />
37 </assert_contents>
38 </element>
39 </output_collection>
40 </test>
41 </tests>
42 <help><![CDATA[
43 Splits a tabular dataset into multiple datsets in a dataset collection.
44 This can be used in a workflow to process datasets in the collection in parallel.
45
46 ]]></help>
47 </tool>