annotate multijoin.xml @ 5:3f0e0d4c15a9 draft

Uploaded
author bgruening
date Wed, 07 Jan 2015 11:15:41 -0500
parents 56e80527c482
children 8928e6d1e7ba
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
1 <tool id="tp_multijoin'_tool" name="Multi-Join" version="@BASE_VERSION@.0">
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
2 <description>(combine multiple files)</description>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
3 <macros>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
4 <import>macros.xml</import>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
5 </macros>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
6 <expand macro="requirements">
5
3f0e0d4c15a9 Uploaded
bgruening
parents: 4
diff changeset
7 <requirement type="package" version="5.18.1">perl</requirement>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
8 <requirement type="package" version="1.0">text_processing_perl_packages</requirement>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
9 </expand>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
10 <command interpreter="perl">
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
11 <![CDATA[
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
12 multijoin
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
13 --key '$key_column'
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
14 --values '$value_columns'
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
15 --filler '$filler'
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
16 $ignore_dups
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
17 $output_header
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
18 $input_header
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
19 $first_file
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
20 #for $file in $files:
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
21 '$file'
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
22 #end for
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
23 > '$outfile'
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
24 ]]>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
25 </command>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
26 <inputs>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
27 <param name="first_file" type="data" format="txt" label="File to join"/>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
28 <param name="files" multiple="True" type="data" format="txt" label="add additional file" />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
29
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
30 <param name="key_column" label="Common key column" type="integer"
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
31 value="1" help="Usually gene-ID or other common value" />
2
fc862d5bccaf Uploaded
bgruening
parents: 1
diff changeset
32
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
33 <param name="value_columns" label="Column with values to preserve"
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
34 type="data_column" data_ref="first_file" accept_default="true" multiple="True" display="checkboxes"/>
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
35
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
36 <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
37 <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
38 <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
39 <param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields">
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
40 <sanitizer>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
41 <valid initial="string.printable">
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
42 <remove value="&apos;"/>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
43 </valid>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
44 </sanitizer>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
45 </param>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
46 </inputs>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
47 <outputs>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
48 <data name="outfile" format="first_file" metadata_source="first_file" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
49 </outputs>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
50 <tests>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
51 <test>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
52 <param name="first_file" value="multijoin1.txt" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
53 <param name="files" value="multijoin2.txt,multijoin3.txt" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
54 <param name="key_column" value="4" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
55 <param name="value_columns" value="c7,c8,c9" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
56 <param name="output_header" value="True" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
57 <output name="outfile" file="multijoin_result1.txt" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
58 </test>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
59 </tests>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
60 <help>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
61 <![CDATA[
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
62 **What it does**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
63
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
64 This tool joins multiple tabular files based on a common key column.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
65
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
66 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
67
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
68 **Example**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
69
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
70 To join three files, based on the 4th column, and keeping the 7th,8th,9th columns:
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
71
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
72 **First file (AAA)**::
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
73
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
74 chr4 888449 890171 FBtr0308778 0 + 266 1527 1722
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
75 chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
76 chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
77 chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
78 chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
79 chr4 995793 996435 FBtr0111046 0 + 7 166 642
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
80 chr4 995793 997931 FBtr0111044 0 + 28 683 2138
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
81 chr4 995793 997931 FBtr0111045 0 + 28 683 2138
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
82 chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
83 ...
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
84
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
85
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
86 **Second File (BBB)**::
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
87
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
88 chr4 90286 134453 FBtr0309803 0 + 657 29084 44167
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
89 chr4 251355 266499 FBtr0089116 0 + 56 1296 15144
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
90 chr4 252050 266506 FBtr0308086 0 + 56 1296 14456
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
91 chr4 252050 266506 FBtr0308087 0 + 56 1296 14456
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
92 chr4 252053 266528 FBtr0300796 0 + 56 1296 14475
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
93 chr4 252053 266528 FBtr0300800 0 + 56 1296 14475
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
94 chr4 252055 266528 FBtr0300798 0 + 56 1296 14473
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
95 chr4 252055 266528 FBtr0300799 0 + 56 1296 14473
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
96 chr4 252541 266528 FBtr0300797 0 + 56 1296 13987
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
97 ...
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
98
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
99 **Third file (CCC)**::
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
100
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
101 chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
102 chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
103 chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
104 chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
105 chr4 995793 996435 FBtr0111046 0 + 5 304 642
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
106 chr4 995793 997931 FBtr0111044 0 + 17 714 2138
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
107 chr4 995793 997931 FBtr0111045 0 + 17 714 2138
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
108 chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
109 ...
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
110
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
111
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
112 **Joining** the files, using **key column 4**, **value columns 7,8,9** and a **header line**, will return::
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
113
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
114 key AAA__V7 AAA__V8 AAA__V9 BBB__V7 BBB__V8 BBB__V9 CCC__V7 CCC__V8 CCC__V9
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
115 FBtr0089116 0 0 0 56 1296 15144 0 0 0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
116 FBtr0089223 5293 13394 13690 0 0 0 17646 13536 13690
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
117 FBtr0089229 3944 6428 6831 0 0 0 9927 6738 6831
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
118 FBtr0089231 3944 6428 6831 0 0 0 9927 6738 6831
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
119 FBtr0089233 3944 6428 6831 0 0 0 9927 6738 6831
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
120 FBtr0111044 28 683 2138 0 0 0 17 714 2138
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
121 FBtr0111045 28 683 2138 0 0 0 17 714 2138
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
122 FBtr0111046 7 166 642 0 0 0 5 304 642
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
123 FBtr0300796 0 0 0 56 1296 14475 0 0 0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
124 ...
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
125
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
126 .. class:: infomark
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
127
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
128 Input files need not be sorted.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
129
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
130 @REFERENCES@
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
131 ]]>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
132 </help>
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
133 </tool>