Mercurial > repos > dave > test_repository
comparison joiner.xml @ 5:b95d7c323cc9 draft default tip
Uploaded
| author | dave |
|---|---|
| date | Thu, 12 Jun 2014 10:33:20 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 4:46aee6903d8f | 5:b95d7c323cc9 |
|---|---|
| 1 <tool id="join1" name="Join two Datasets" version="2.0.2"> | |
| 2 <description>side by side on a specified field</description> | |
| 3 <command interpreter="python">join.py $input1 $input2 $field1 $field2 $out_file1 $unmatched $partial --index_depth=3 --buffer=50000000 --fill_options_file=$fill_options_file</command> | |
| 4 <inputs> | |
| 5 <param format="tabular" name="input1" type="data" label="Join"/> | |
| 6 <param name="field1" label="using column" type="data_column" data_ref="input1" /> | |
| 7 <param format="tabular" name="input2" type="data" label="with" /> | |
| 8 <param name="field2" label="and column" type="data_column" data_ref="input2" /> | |
| 9 <param name="unmatched" type="select" label="Keep lines of first input that do not join with second input"> | |
| 10 <option value="-u">Yes</option> | |
| 11 <option value="" selected="true">No</option> | |
| 12 </param> | |
| 13 <param name="partial" type="select" label="Keep lines of first input that are incomplete"> | |
| 14 <option value="-p">Yes</option> | |
| 15 <option value="" selected="true">No</option> | |
| 16 </param> | |
| 17 <conditional name="fill_empty_columns"> | |
| 18 <param name="fill_empty_columns_switch" type="select" label="Fill empty columns"> | |
| 19 <option value="no_fill" selected="True">No</option> | |
| 20 <option value="fill_empty">Yes</option> | |
| 21 </param> | |
| 22 <when value="no_fill"> | |
| 23 <!-- do nothing --> | |
| 24 </when> | |
| 25 <when value="fill_empty"> | |
| 26 <param type="select" name="fill_columns_by" label="Only fill unjoined rows"> | |
| 27 <option value="fill_unjoined_only" selected="True">Yes</option> | |
| 28 <option value="fill_all">No</option> | |
| 29 </param> | |
| 30 <conditional name="do_fill_empty_columns"> | |
| 31 <param name="column_fill_type" type="select" label="Fill Columns by"> | |
| 32 <option value="single_fill_value" selected="True">Single fill value</option> | |
| 33 <option value="fill_value_by_column">Values by column</option> | |
| 34 </param> | |
| 35 <when value="single_fill_value"> | |
| 36 <param type="text" name="fill_value" label="Fill value" value="."/> | |
| 37 </when> | |
| 38 <when value="fill_value_by_column"> | |
| 39 <repeat name="column_fill1" title="Fill Column for Input 1"> | |
| 40 <param name="column_number1" label="Column" type="data_column" data_ref="input1" /> | |
| 41 <param type="text" name="fill_value1" value="."/> | |
| 42 </repeat> | |
| 43 <repeat name="column_fill2" title="Fill Column for Input 2"> | |
| 44 <param name="column_number2" label="Column" type="data_column" data_ref="input2" /> | |
| 45 <param type="text" name="fill_value2" value="."/> | |
| 46 </repeat> | |
| 47 </when> | |
| 48 </conditional> | |
| 49 </when> | |
| 50 </conditional> | |
| 51 </inputs> | |
| 52 <configfiles> | |
| 53 <configfile name="fill_options_file"><% | |
| 54 import simplejson | |
| 55 %> | |
| 56 #set $__fill_options = {} | |
| 57 #if $fill_empty_columns['fill_empty_columns_switch'] == 'fill_empty': | |
| 58 #set $__fill_options['fill_unjoined_only'] = $fill_empty_columns['fill_columns_by'].value == 'fill_unjoined_only' | |
| 59 #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'single_fill_value': | |
| 60 #set $__start_fill = $fill_empty_columns['do_fill_empty_columns']['fill_value'].value | |
| 61 #else: | |
| 62 #set $__start_fill = "" | |
| 63 #end if | |
| 64 #set $__fill_options['file1_columns'] = [ __start_fill for i in range( int( $input1.metadata.columns ) ) ] | |
| 65 #set $__fill_options['file2_columns'] = [ __start_fill for i in range( int( $input2.metadata.columns ) ) ] | |
| 66 #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'fill_value_by_column': | |
| 67 #for column_fill1 in $fill_empty_columns['do_fill_empty_columns']['column_fill1']: | |
| 68 #set $__fill_options['file1_columns'][ int( column_fill1['column_number1'].value ) - 1 ] = column_fill1['fill_value1'].value | |
| 69 #end for | |
| 70 #for column_fill2 in $fill_empty_columns['do_fill_empty_columns']['column_fill2']: | |
| 71 #set $__fill_options['file2_columns'][ int( column_fill2['column_number2'].value ) - 1 ] = column_fill2['fill_value2'].value | |
| 72 #end for | |
| 73 #end if | |
| 74 #end if | |
| 75 ${simplejson.dumps( __fill_options )} | |
| 76 </configfile> | |
| 77 </configfiles> | |
| 78 <outputs> | |
| 79 <data format="input" name="out_file1" metadata_source="input1" /> | |
| 80 </outputs> | |
| 81 <tests> | |
| 82 <test> | |
| 83 <param name="input1" value="1.bed"/> | |
| 84 <param name="input2" value="2.bed"/> | |
| 85 <param name="field1" value="2"/> | |
| 86 <param name="field2" value="2"/> | |
| 87 <param name="unmatched" value=""/> | |
| 88 <param name="partial" value=""/> | |
| 89 <param name="fill_empty_columns_switch" value="no_fill"/> | |
| 90 <output name="out_file1" file="joiner_out1.bed"/> | |
| 91 </test> | |
| 92 <test> | |
| 93 <param name="input1" value="1.bed"/> | |
| 94 <param name="input2" value="2.bed"/> | |
| 95 <param name="field1" value="2"/> | |
| 96 <param name="field2" value="2"/> | |
| 97 <param name="unmatched" value="Yes"/> | |
| 98 <param name="partial" value="Yes"/> | |
| 99 <param name="fill_empty_columns_switch" value="no_fill"/> | |
| 100 <output name="out_file1" file="joiner_out2.bed"/> | |
| 101 </test> | |
| 102 <test> | |
| 103 <param name="input1" value="1.bed"/> | |
| 104 <param name="input2" value="2.bed"/> | |
| 105 <param name="field1" value="2"/> | |
| 106 <param name="field2" value="2"/> | |
| 107 <param name="unmatched" value="Yes"/> | |
| 108 <param name="partial" value="Yes"/> | |
| 109 <param name="fill_empty_columns_switch" value="fill_empty"/> | |
| 110 <param name="fill_columns_by" value="fill_all"/> | |
| 111 <param name="column_fill_type" value="single_fill_value"/> | |
| 112 <param name="fill_value" value="~"/> | |
| 113 <output name="out_file1" file="joiner_out3.bed"/> | |
| 114 </test> | |
| 115 <test> | |
| 116 <param name="input1" value="1.bed"/> | |
| 117 <param name="input2" value="2.bed"/> | |
| 118 <param name="field1" value="2"/> | |
| 119 <param name="field2" value="2"/> | |
| 120 <param name="unmatched" value="Yes"/> | |
| 121 <param name="partial" value="Yes"/> | |
| 122 <param name="fill_empty_columns_switch" value="fill_empty"/> | |
| 123 <param name="fill_columns_by" value="fill_all"/> | |
| 124 <param name="column_fill_type" value="fill_value_by_column"/> | |
| 125 <param name="column_number1" value="6"/> | |
| 126 <param name="fill_value1" value="+"/> | |
| 127 <param name="column_number2" value="1"/> | |
| 128 <param name="fill_value2" value="NoChrom"/> | |
| 129 <output name="out_file1" file="joiner_out4.bed"/> | |
| 130 </test> | |
| 131 </tests> | |
| 132 <help> | |
| 133 | |
| 134 .. class:: warningmark | |
| 135 | |
| 136 **This tool will attempt to reuse the metadata from your first input.** To change metadata assignments click on the "edit attributes" link of the history item generated by this tool. | |
| 137 | |
| 138 .. class:: infomark | |
| 139 | |
| 140 **TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* | |
| 141 | |
| 142 ----- | |
| 143 | |
| 144 **Syntax** | |
| 145 | |
| 146 This tool joins lines of two datasets on a common field. An empty string ("") is not a valid identifier. | |
| 147 You may choose to include lines of your first input that do not join with your second input. | |
| 148 | |
| 149 - Columns are referenced with a **number**. For example, **3** refers to the 3rd column of a tab-delimited file. | |
| 150 | |
| 151 ----- | |
| 152 | |
| 153 **Example** | |
| 154 | |
| 155 Dataset1:: | |
| 156 | |
| 157 chr1 10 20 geneA | |
| 158 chr1 50 80 geneB | |
| 159 chr5 10 40 geneL | |
| 160 | |
| 161 Dataset2:: | |
| 162 | |
| 163 geneA tumor-supressor | |
| 164 geneB Foxp2 | |
| 165 geneC Gnas1 | |
| 166 geneE INK4a | |
| 167 | |
| 168 Joining the 4th column of Dataset1 with the 1st column of Dataset2 will yield:: | |
| 169 | |
| 170 chr1 10 20 geneA geneA tumor-suppressor | |
| 171 chr1 50 80 geneB geneB Foxp2 | |
| 172 | |
| 173 Joining the 4th column of Dataset1 with the 1st column of Dataset2, while keeping all lines from Dataset1, will yield:: | |
| 174 | |
| 175 chr1 10 20 geneA geneA tumor-suppressor | |
| 176 chr1 50 80 geneB geneB Foxp2 | |
| 177 chr5 10 40 geneL | |
| 178 | |
| 179 </help> | |
| 180 </tool> |
