Mercurial > repos > gordon > unix_tools
comparison multijoin.xml @ 0:631dfde45073 draft default tip
First tool-shed public version
| author | gordon |
|---|---|
| date | Tue, 09 Oct 2012 18:48:06 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:631dfde45073 |
|---|---|
| 1 <tool id="cshl_multijoin" name="Multi-Join" version="0.1.1"> | |
| 2 <description>(combine multiple files)</description> | |
| 3 <command interpreter="perl">multijoin | |
| 4 --key '$key_column' | |
| 5 --values '$value_columns' | |
| 6 --filler '$filler' | |
| 7 $ignore_dups | |
| 8 $output_header | |
| 9 $input_header | |
| 10 #for $file in $files | |
| 11 '$file.filename' | |
| 12 #end for | |
| 13 > '$output' | |
| 14 </command> | |
| 15 | |
| 16 <inputs> | |
| 17 <repeat name="files" title="file to join"> | |
| 18 <param name="filename" label="Add file" type="data" format="txt" /> | |
| 19 </repeat> | |
| 20 | |
| 21 <param name="key_column" label="Common key column" type="integer" | |
| 22 value="1" help="Usually gene-ID or other common value" /> | |
| 23 | |
| 24 <param name="value_columns" label="Column with values to join" type="text" | |
| 25 value="2,3,4" help="Enter comma-separated list of columns, e.g. 3,6,8"> | |
| 26 <sanitizer> | |
| 27 <valid initial="string.printable"> | |
| 28 <remove value="'"/> | |
| 29 </valid> | |
| 30 </sanitizer> | |
| 31 </param> | |
| 32 | |
| 33 <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" /> | |
| 34 | |
| 35 <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" /> | |
| 36 | |
| 37 <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." /> | |
| 38 | |
| 39 <param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields"> | |
| 40 <sanitizer> | |
| 41 <valid initial="string.printable"> | |
| 42 <remove value="'"/> | |
| 43 </valid> | |
| 44 </sanitizer> | |
| 45 </param> | |
| 46 | |
| 47 </inputs> | |
| 48 <outputs> | |
| 49 <data name="output" format="input" metadata_source="input1" /> | |
| 50 </outputs> | |
| 51 | |
| 52 <help> | |
| 53 **What it does** | |
| 54 | |
| 55 This tool joins multiple tabular files based on a common key column. | |
| 56 | |
| 57 ----- | |
| 58 | |
| 59 **Example** | |
| 60 | |
| 61 To join three files, based on the 4th column, and keeping the 7th,8th,9th columns: | |
| 62 | |
| 63 **First file (AAA)**:: | |
| 64 | |
| 65 chr4 888449 890171 FBtr0308778 0 + 266 1527 1722 | |
| 66 chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850 | |
| 67 chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831 | |
| 68 chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831 | |
| 69 chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831 | |
| 70 chr4 995793 996435 FBtr0111046 0 + 7 166 642 | |
| 71 chr4 995793 997931 FBtr0111044 0 + 28 683 2138 | |
| 72 chr4 995793 997931 FBtr0111045 0 + 28 683 2138 | |
| 73 chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690 | |
| 74 ... | |
| 75 | |
| 76 | |
| 77 **Second File (BBB)**:: | |
| 78 | |
| 79 chr4 90286 134453 FBtr0309803 0 + 657 29084 44167 | |
| 80 chr4 251355 266499 FBtr0089116 0 + 56 1296 15144 | |
| 81 chr4 252050 266506 FBtr0308086 0 + 56 1296 14456 | |
| 82 chr4 252050 266506 FBtr0308087 0 + 56 1296 14456 | |
| 83 chr4 252053 266528 FBtr0300796 0 + 56 1296 14475 | |
| 84 chr4 252053 266528 FBtr0300800 0 + 56 1296 14475 | |
| 85 chr4 252055 266528 FBtr0300798 0 + 56 1296 14473 | |
| 86 chr4 252055 266528 FBtr0300799 0 + 56 1296 14473 | |
| 87 chr4 252541 266528 FBtr0300797 0 + 56 1296 13987 | |
| 88 ... | |
| 89 | |
| 90 **Third file (CCC)**:: | |
| 91 | |
| 92 chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850 | |
| 93 chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831 | |
| 94 chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831 | |
| 95 chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831 | |
| 96 chr4 995793 996435 FBtr0111046 0 + 5 304 642 | |
| 97 chr4 995793 997931 FBtr0111044 0 + 17 714 2138 | |
| 98 chr4 995793 997931 FBtr0111045 0 + 17 714 2138 | |
| 99 chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690 | |
| 100 ... | |
| 101 | |
| 102 | |
| 103 **Joining** the files, using **key column 4**, **value columns 7,8,9** and a **header line**, will return:: | |
| 104 | |
| 105 key AAA__V7 AAA__V8 AAA__V9 BBB__V7 BBB__V8 BBB__V9 CCC__V7 CCC__V8 CCC__V9 | |
| 106 FBtr0089116 0 0 0 56 1296 15144 0 0 0 | |
| 107 FBtr0089223 5293 13394 13690 0 0 0 17646 13536 13690 | |
| 108 FBtr0089229 3944 6428 6831 0 0 0 9927 6738 6831 | |
| 109 FBtr0089231 3944 6428 6831 0 0 0 9927 6738 6831 | |
| 110 FBtr0089233 3944 6428 6831 0 0 0 9927 6738 6831 | |
| 111 FBtr0111044 28 683 2138 0 0 0 17 714 2138 | |
| 112 FBtr0111045 28 683 2138 0 0 0 17 714 2138 | |
| 113 FBtr0111046 7 166 642 0 0 0 5 304 642 | |
| 114 FBtr0300796 0 0 0 56 1296 14475 0 0 0 | |
| 115 ... | |
| 116 | |
| 117 | |
| 118 # Input files need not be sorted. | |
| 119 | |
| 120 ----- | |
| 121 | |
| 122 *multijoin* was written by A. Gordon (gordon at cshl dot edu) | |
| 123 | |
| 124 </help> | |
| 125 </tool> |
