Mercurial > repos > jjohnson > split_to_collection
comparison split_tabular_to_collection.xml @ 0:f6254e4e155e draft default tip
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/split_to_collection commit b2ce04dd96d8b00103c23b58a4c6539a6b30809a-dirty
author | jjohnson |
---|---|
date | Thu, 26 Oct 2017 13:32:38 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f6254e4e155e |
---|---|
1 <tool id="split_tabular_to_collection" name="Split Tabular into Collection" version="0.1.0"> | |
2 <description>by lines</description> | |
3 <command><![CDATA[ | |
4 #set $width = len(str($input.dataset.metadata.data_lines)) | |
5 #if $skip_comment_lines: | |
6 #set $skip = $input.dataset.metadata.comment_lines | |
7 #else | |
8 #set $skip = 0 | |
9 #end if | |
10 #set $offset = $skip + 1 | |
11 awk 'NR > $skip{ print \$0 > (sprintf("%s_%.${width}d","${input.name}",int((NR-${offset})/${lines})*${lines}) ) }' $input | |
12 ]]></command> | |
13 <inputs> | |
14 <param name="input" type="data" format="tabular" label="Tabular dataset to split"/> | |
15 <param name="lines" type="integer" value="1000" min="1" label="Number of lines per output dataset"/> | |
16 <param name="skip_comment_lines" type="boolean" truevalue="yes" falsevalue="no" checked="true" | |
17 label="Skip comment lines"/> | |
18 </inputs> | |
19 <outputs> | |
20 <collection name="output_set" type="list" label="${input.name} Split List"> | |
21 <discover_datasets pattern="__name__" ext="tabular" visible="false"/> | |
22 </collection> | |
23 </outputs> | |
24 <tests> | |
25 <test> | |
26 <param name="input" value="input.tsv" ftype="tabular"/> | |
27 <param name="lines" value="20"/> | |
28 <output_collection name="output_set" type="list"> | |
29 <element name="input.tsv_00"> | |
30 <assert_contents> | |
31 <has_text_matching expression="20\tt\tT" /> | |
32 </assert_contents> | |
33 </element> | |
34 <element name="input.tsv_00"> | |
35 <assert_contents> | |
36 <has_text_matching expression="21\tu\tU" /> | |
37 </assert_contents> | |
38 </element> | |
39 </output_collection> | |
40 </test> | |
41 </tests> | |
42 <help><![CDATA[ | |
43 Splits a tabular dataset into multiple datsets in a dataset collection. | |
44 This can be used in a workflow to process datasets in the collection in parallel. | |
45 | |
46 ]]></help> | |
47 </tool> |