annotate multijoin.xml @ 0:631dfde45073 draft default tip

First tool-shed public version
author gordon
date Tue, 09 Oct 2012 18:48:06 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
1 <tool id="cshl_multijoin" name="Multi-Join" version="0.1.1">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
2 <description>(combine multiple files)</description>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
3 <command interpreter="perl">multijoin
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
4 --key '$key_column'
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
5 --values '$value_columns'
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
6 --filler '$filler'
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
7 $ignore_dups
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
8 $output_header
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
9 $input_header
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
10 #for $file in $files
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
11 '$file.filename'
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
12 #end for
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
13 &gt; '$output'
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
14 </command>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
15
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
16 <inputs>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
17 <repeat name="files" title="file to join">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
18 <param name="filename" label="Add file" type="data" format="txt" />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
19 </repeat>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
20
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
21 <param name="key_column" label="Common key column" type="integer"
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
22 value="1" help="Usually gene-ID or other common value" />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
23
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
24 <param name="value_columns" label="Column with values to join" type="text"
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
25 value="2,3,4" help="Enter comma-separated list of columns, e.g. 3,6,8">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
26 <sanitizer>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
27 <valid initial="string.printable">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
28 <remove value="&apos;"/>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
29 </valid>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
30 </sanitizer>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
31 </param>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
32
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
33 <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
34
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
35 <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
36
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
37 <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
38
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
39 <param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
40 <sanitizer>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
41 <valid initial="string.printable">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
42 <remove value="&apos;"/>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
43 </valid>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
44 </sanitizer>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
45 </param>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
46
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
47 </inputs>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
48 <outputs>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
49 <data name="output" format="input" metadata_source="input1" />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
50 </outputs>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
51
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
52 <help>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
53 **What it does**
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
54
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
55 This tool joins multiple tabular files based on a common key column.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
56
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
57 -----
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
58
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
59 **Example**
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
60
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
61 To join three files, based on the 4th column, and keeping the 7th,8th,9th columns:
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
62
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
63 **First file (AAA)**::
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
64
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
65 chr4 888449 890171 FBtr0308778 0 + 266 1527 1722
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
66 chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
67 chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
68 chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
69 chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
70 chr4 995793 996435 FBtr0111046 0 + 7 166 642
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
71 chr4 995793 997931 FBtr0111044 0 + 28 683 2138
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
72 chr4 995793 997931 FBtr0111045 0 + 28 683 2138
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
73 chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
74 ...
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
75
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
76
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
77 **Second File (BBB)**::
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
78
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
79 chr4 90286 134453 FBtr0309803 0 + 657 29084 44167
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
80 chr4 251355 266499 FBtr0089116 0 + 56 1296 15144
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
81 chr4 252050 266506 FBtr0308086 0 + 56 1296 14456
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
82 chr4 252050 266506 FBtr0308087 0 + 56 1296 14456
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
83 chr4 252053 266528 FBtr0300796 0 + 56 1296 14475
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
84 chr4 252053 266528 FBtr0300800 0 + 56 1296 14475
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
85 chr4 252055 266528 FBtr0300798 0 + 56 1296 14473
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
86 chr4 252055 266528 FBtr0300799 0 + 56 1296 14473
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
87 chr4 252541 266528 FBtr0300797 0 + 56 1296 13987
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
88 ...
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
89
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
90 **Third file (CCC)**::
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
91
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
92 chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
93 chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
94 chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
95 chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
96 chr4 995793 996435 FBtr0111046 0 + 5 304 642
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
97 chr4 995793 997931 FBtr0111044 0 + 17 714 2138
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
98 chr4 995793 997931 FBtr0111045 0 + 17 714 2138
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
99 chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
100 ...
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
101
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
102
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
103 **Joining** the files, using **key column 4**, **value columns 7,8,9** and a **header line**, will return::
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
104
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
105 key AAA__V7 AAA__V8 AAA__V9 BBB__V7 BBB__V8 BBB__V9 CCC__V7 CCC__V8 CCC__V9
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
106 FBtr0089116 0 0 0 56 1296 15144 0 0 0
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
107 FBtr0089223 5293 13394 13690 0 0 0 17646 13536 13690
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
108 FBtr0089229 3944 6428 6831 0 0 0 9927 6738 6831
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
109 FBtr0089231 3944 6428 6831 0 0 0 9927 6738 6831
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
110 FBtr0089233 3944 6428 6831 0 0 0 9927 6738 6831
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
111 FBtr0111044 28 683 2138 0 0 0 17 714 2138
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
112 FBtr0111045 28 683 2138 0 0 0 17 714 2138
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
113 FBtr0111046 7 166 642 0 0 0 5 304 642
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
114 FBtr0300796 0 0 0 56 1296 14475 0 0 0
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
115 ...
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
116
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
117
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
118 # Input files need not be sorted.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
119
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
120 -----
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
121
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
122 *multijoin* was written by A. Gordon (gordon at cshl dot edu)
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
123
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
124 </help>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
125 </tool>