Mercurial > repos > devteam > column_maker
comparison column_maker.xml @ 9:33b81f9ea109 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_maker commit fe76077775aaca531f6a563fdfcbd73fbf1528e7
author | iuc |
---|---|
date | Thu, 28 Jul 2022 15:27:54 +0000 |
parents | 227e82286a0e |
children | beec6ecc7d3c |
comparison
equal
deleted
inserted
replaced
8:227e82286a0e | 9:33b81f9ea109 |
---|---|
1 <tool id="Add_a_column1" name="Compute" version="1.6"> | 1 <tool id="Add_a_column1" name="Compute" version="2.0"> |
2 <description>an expression on every row</description> | 2 <description>on rows</description> |
3 <macros> | |
4 <xml name="compute_repeat"> | |
5 <repeat name="expressions" title="Expressions" min="1" default="1"> | |
6 <param name="cond" type="text" value="c3-c2" label="Add expression"> | |
7 <sanitizer> | |
8 <valid initial="default"> | |
9 <add value="<" /> | |
10 <add value=">" /> | |
11 <add value=""" /> | |
12 <add value="'" /> | |
13 </valid> | |
14 </sanitizer> | |
15 </param> | |
16 <conditional name="add_column"> | |
17 <param name="mode" type="select" label="Mode of the operation"> | |
18 <option value="">Append</option> | |
19 <option value="I">Insert</option> | |
20 <option value="R">Replace</option> | |
21 </param> | |
22 <when value=""> | |
23 <param name="pos" type="hidden" value="" /> | |
24 </when> | |
25 <when value="I"> | |
26 <param name="pos" type="integer" min="1" value="1" label="Insert new column before existing column number" /> | |
27 </when> | |
28 <when value="R"> | |
29 <param name="pos" type="integer" min="1" value="1" label="Use new column to replace column number" /> | |
30 </when> | |
31 </conditional> | |
32 <yield /> | |
33 </repeat> | |
34 </xml> | |
35 </macros> | |
3 <requirements> | 36 <requirements> |
4 <requirement type="package" version="3.8">python</requirement> | 37 <requirement type="package" version="3.8">python</requirement> |
5 <requirement type="package" version="1.19.1">numpy</requirement> | 38 <requirement type="package" version="1.23.1">numpy</requirement> |
6 </requirements> | 39 </requirements> |
7 <command detect_errors="aggressive"><![CDATA[ | 40 <command detect_errors="aggressive"><![CDATA[ |
8 ln -s '$input' data && | 41 python '$__tool_directory__/column_maker.py' |
9 | 42 #if str($error_handling.auto_col_types) == 'on': |
10 ## inject colums and column_types metadata into inputs json | 43 #set $col_types = $input.metadata.column_types |
11 #import json | 44 #else: |
12 #set inputs_dict = json.load(open($inputs)) | 45 #set $col_types = ','.join(['str' for t in $input.metadata.column_types.split(',')]) |
13 #set inputs_dict['columns'] = $input.metadata.columns | 46 #end if |
14 #set inputs_dict['column_types'] = $input.metadata.column_types | 47 --column-types $col_types |
15 ## flatten conditional | 48 $avoid_scientific_notation |
16 #if $header_lines_conditional.header_lines_select == "yes": | 49 #if str($ops.header_lines_select) == 'yes': |
17 #set inputs_dict['header_new_column_name'] = str($header_lines_conditional.header_new_column_name) | 50 --header |
18 #end if | 51 #end if |
19 #set x = json.dump($inputs_dict, open($inputs, 'w')) | 52 --file '$expressions_file' |
20 | 53 $error_handling.fail_on_non_existent_columns |
21 python '$__tool_directory__/column_maker.py' | 54 $error_handling.non_computable.action |
22 data '$out_file1' | 55 #if str($error_handling.non_computable.action) == '--non-computable-default': |
23 --load_json '$inputs' | 56 '$error_handling.non_computable.default_value' |
57 #end if | |
58 '$input' | |
59 '$out_file1' | |
24 ]]></command> | 60 ]]></command> |
25 <configfiles> | 61 <configfiles> |
26 <inputs name="inputs"/> | 62 <configfile name="expressions_file"><![CDATA[ |
63 #if str($ops.header_lines_select) == 'yes': | |
64 #for $expr in $ops.expressions: | |
65 ${expr.cond};${expr.add_column.pos}${expr.add_column.mode};${expr.new_column_name} | |
66 #end for | |
67 #else: | |
68 #for $expr in $ops.expressions: | |
69 ${expr.cond};${expr.add_column.pos}${expr.add_column.mode}; | |
70 #end for | |
71 #end if | |
72 ]]></configfile> | |
27 </configfiles> | 73 </configfiles> |
28 <inputs> | 74 <inputs> |
29 <param name="cond" type="text" value="c3-c2" label="Add expression"> | 75 <param name="input" type="data" format="tabular" label="Input file" help="Dataset missing? See TIP below" /> |
30 <sanitizer> | 76 <conditional name="ops"> |
31 <valid initial="default"> | |
32 <add value="<" /> | |
33 <add value=">" /> | |
34 <add value=""" /> | |
35 <add value="'" /> | |
36 </valid> | |
37 </sanitizer> | |
38 </param> | |
39 <param format="tabular" name="input" type="data" label="as a new column to" help="Dataset missing? See TIP below"/> | |
40 <param name="round" type="boolean" truevalue="yes" falsevalue="no" label="Round result?" /> | |
41 <param name="avoid_scientific_notation" type="boolean" truevalue="yes" falsevalue="no" | |
42 label="Avoid scientific notation" | |
43 help="If yes, use fully expanded decimal representation when writing new columns (use only if expression produces decimal numbers)." /> | |
44 <conditional name="header_lines_conditional"> | |
45 <param name="header_lines_select" type="select" | 77 <param name="header_lines_select" type="select" |
46 label="Input has a header line with column names?" | 78 label="Input has a header line with column names?" |
47 help="Select Yes to be able to specify a name for the new column and have it added to the header line. If you select No, the first line will be treated as a regular line: If it is empty or starts with a # character it will be skipped, otherwise the tool will attempt to compute the specified expression on it." > | 79 help="Select Yes to be able to specify names for new columns and have them added to the header line. If you select No, the first line will be treated as a regular line: If it is empty or starts with a # character it will be skipped, otherwise the tool will attempt to compute the specified expression on it." > |
48 <option value="no" >No</option> | 80 <option value="no">No</option> |
49 <option value="yes" >Yes</option> | 81 <option value="yes">Yes</option> |
50 </param> | 82 </param> |
51 <when value="no"> | 83 <when value="no"> |
84 <expand macro="compute_repeat" /> | |
52 </when> | 85 </when> |
53 <when value="yes"> | 86 <when value="yes"> |
54 <param name="header_new_column_name" type="text" value="New Column" label="The new column name" /> | 87 <expand macro="compute_repeat"> |
88 <param name="new_column_name" type="text" value="New Column" label="The new column name" /> | |
89 </expand> | |
55 </when> | 90 </when> |
56 </conditional> | 91 </conditional> |
92 <param name="avoid_scientific_notation" type="boolean" truevalue="--avoid-scientific-notation" falsevalue="" | |
93 label="Avoid scientific notation in any newly computed columns" | |
94 help="If yes, use fully expanded decimal representation when writing new columns with floating point values. To prevent scientific notation in just specific new columns, you can use numpy's format_float_positional function in the corresponding expression." /> | |
95 <section name="error_handling" title="Error handling"> | |
96 <param name="auto_col_types" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Autodetect column types" | |
97 help="By default, try to use the column types that Galaxy has recorded for the input. This simplifies expressions, but can occasionally cause problems on its own. If disabled all column values are assumed to be strings and you will have to handle conversions to different types explicitly in the expression." /> | |
98 <param argument="--fail-on-non-existent-columns" type="boolean" truevalue="--fail-on-non-existent-columns" falsevalue="" checked="true" label="Fail on references to non-existent columns" | |
99 help="If any expression references a column number that does not exist when that expression gets computed, the tool run will fail. Uncheck to have such a situation handled as a case of a non-computable expression as configured below." /> | |
100 <conditional name="non_computable"> | |
101 <param name="action" type="select" label="If an expression cannot be computed for a row"> | |
102 <option value="--fail-on-non-computable">Fail the entire tool run</option> | |
103 <option value="--skip-non-computable">Skip the row</option> | |
104 <option value="--keep-non-computable">Keep the row unchanged</option> | |
105 <option value="--non-computable-blank">Produce an empty column value for the row</option> | |
106 <option value="--non-computable-default">Fill in a replacement value</option> | |
107 </param> | |
108 <when value="--fail-on-non-computable" /> | |
109 <when value="--skip-non-computable" /> | |
110 <when value="--keep-non-computable" /> | |
111 <when value="--non-computable-blank" /> | |
112 <when value="--non-computable-default"> | |
113 <param name="default_value" type="text" label="Replacement value" help="Pick from suggestions or enter your own."> | |
114 <option value="nan">nan (not a number)</option> | |
115 <option value="inf">inf (infinity)</option> | |
116 <option value="-inf">-inf (negative infinity)</option> | |
117 <option value="NA">NA (not available)</option> | |
118 <option value=".">.</option> | |
119 </param> | |
120 </when> | |
121 </conditional> | |
122 </section> | |
57 </inputs> | 123 </inputs> |
58 <outputs> | 124 <outputs> |
59 <data format_source="input" name="out_file1" metadata_source="input"/> | 125 <data name="out_file1" format_source="input" metadata_source="input"/> |
60 </outputs> | 126 </outputs> |
61 <tests> | 127 <tests> |
62 <test> | 128 <test> |
63 <param name="cond" value="c3-c2"/> | 129 <param name="cond" value="float(c3-c2)"/> |
64 <param name="input" value="1.bed"/> | 130 <param name="input" value="1.bed" ftype="bed" /> |
65 <param name="round" value="false"/> | |
66 <output name="out_file1" file="column_maker_out1.interval"/> | 131 <output name="out_file1" file="column_maker_out1.interval"/> |
67 </test> | 132 </test> |
68 <test> | 133 <test> |
69 <param name="cond" value="c4*1"/> | 134 <param name="cond" value="c4*1."/> |
135 <param name="input" value="1.interval" ftype="interval" /> | |
136 <output name="out_file1" file="column_maker_out2.interval"/> | |
137 </test> | |
138 <test> | |
139 <param name="cond" value="c4*1."/> | |
140 <param name="input" value="1.header.tsv" ftype="tabular" /> | |
141 <param name="header_lines_select" value="yes" /> | |
142 <param name="new_column_name" value="value1_again" /> | |
143 <output name="out_file1" file="column_maker_out2.header.tsv"/> | |
144 </test> | |
145 <test> | |
146 <param name="cond" value="round(c4*1)"/> | |
70 <param name="input" value="1.interval"/> | 147 <param name="input" value="1.interval"/> |
71 <param name="round" value="false"/> | |
72 <output name="out_file1" file="column_maker_out2.interval"/> | |
73 </test> | |
74 <test> | |
75 <param name="cond" value="c4*1"/> | |
76 <param name="input" value="1.header.tsv"/> | |
77 <param name="round" value="false"/> | |
78 <conditional name="header_lines_conditional"> | |
79 <param name="header_lines_select" value="yes" /> | |
80 <param name="header_new_column_name" value="value1_again" /> | |
81 </conditional> | |
82 <output name="out_file1" file="column_maker_out2.header.tsv"/> | |
83 </test> | |
84 <test> | |
85 <param name="cond" value="c4*1"/> | |
86 <param name="input" value="1.interval"/> | |
87 <param name="round" value="true"/> | |
88 <output name="out_file1" file="column_maker_out3.interval"/> | 148 <output name="out_file1" file="column_maker_out3.interval"/> |
89 </test> | 149 </test> |
90 <test> | 150 <test> |
91 <!-- test that single column input works --> | 151 <!-- test that single column input works --> |
92 <param name="cond" value="c1/10"/> | 152 <param name="cond" value="c1/10"/> |
93 <param name="input" value="1.tab" ftype="tabular"/> | 153 <param name="input" value="1.tab" ftype="tabular" /> |
94 <param name="round" value="no"/> | |
95 <output name="out_file1" file="column_maker_out4.tab"/> | 154 <output name="out_file1" file="column_maker_out4.tab"/> |
96 </test> | 155 </test> |
97 <test> | 156 <test> |
98 <param name="cond" value="float(.0000000000001)"/> | 157 <param name="cond" value="float(.0000000000001)"/> |
99 <param name="input" value="1.bed"/> | 158 <param name="input" value="1.bed"/> |
100 <param name="round" value="false"/> | |
101 <output name="out_file1"> | 159 <output name="out_file1"> |
102 <assert_contents> | 160 <assert_contents> |
103 <has_text text="CCDS10397" /> | 161 <has_text text="CCDS10397" /> |
104 <has_text text="1e-13" /> | 162 <has_text text="1e-13" /> |
105 </assert_contents> | 163 </assert_contents> |
106 </output> | 164 </output> |
107 </test> | 165 </test> |
108 <test> | 166 <test> |
109 <param name="cond" value="float(.0000000000001)"/> | 167 <param name="cond" value="float(.0000000000001)"/> |
110 <param name="input" value="1.bed"/> | 168 <param name="input" value="1.bed" ftype="bed" /> |
111 <param name="round" value="false"/> | |
112 <param name="avoid_scientific_notation" value="true"/> | 169 <param name="avoid_scientific_notation" value="true"/> |
113 <output name="out_file1"> | 170 <output name="out_file1"> |
114 <assert_contents> | 171 <assert_contents> |
115 <has_text text="CCDS10397" /> | 172 <has_text text="CCDS10397" /> |
116 <has_text text=".0000000000001" /> | 173 <has_text text=".0000000000001" /> |
117 <not_has_text text="1e-13" /> | 174 <not_has_text text="1e-13" /> |
118 </assert_contents> | 175 </assert_contents> |
119 </output> | 176 </output> |
120 </test> | 177 </test> |
178 <test> | |
179 <param name="input" value="1.tab" ftype="tabular" /> | |
180 <repeat name="expressions"> | |
181 <param name="cond" value="c1/10" /> | |
182 <conditional name="add_column"> | |
183 <param name="mode" value="R" /> | |
184 <param name="pos" value="1" /> | |
185 </conditional> | |
186 </repeat> | |
187 <repeat name="expressions"> | |
188 <param name="cond" value="round(c1*10)" /> | |
189 <conditional name="add_column"> | |
190 <param name="mode" value="I" /> | |
191 <param name="pos" value="1" /> | |
192 </conditional> | |
193 </repeat> | |
194 <output name="out_file1" file="column_maker_out4.tab" /> | |
195 </test> | |
196 <!-- Test list column type in input --> | |
197 <test> | |
198 <param name="input" value="bed12.bed" ftype="bed12" /> | |
199 <!-- get largest blocksize from column 11 of bed12 and use it as | |
200 new score value --> | |
201 <param name="cond" value="max(map(int, c11))" /> | |
202 <conditional name="add_column"> | |
203 <param name="mode" value="R" /> | |
204 <param name="pos" value="5" /> | |
205 </conditional> | |
206 <output name="out_file1" file="bed12_modified.bed" /> | |
207 </test> | |
208 <!-- Test error handling example from help section --> | |
209 <test> | |
210 <param name="input" value="short_line_test.tab" ftype="tabular" /> | |
211 <param name="cond" value="c6" /> | |
212 <conditional name="add_column"> | |
213 <param name="mode" value="R" /> | |
214 <param name="pos" value="6" /> | |
215 </conditional> | |
216 <param name="fail_on_non_existent_columns" value="false" /> | |
217 <param name="action" value="--non-computable-default" /> | |
218 <param name="default_value" value="." /> | |
219 <output name="out_file1" file="short_line_test_out.tab" /> | |
220 </test> | |
221 <!-- Test athletes BMI calculation in presence of NA values as in | |
222 https://training.galaxyproject.org/training-material/topics/introduction/tutorials/data-manipulation-olympics/tutorial.html#exercises-4 | |
223 --> | |
224 <test> | |
225 <param name="input" value="olympics.tsv" ftype="tabular" /> | |
226 <param name="header_lines_select" value="yes" /> | |
227 <param name="new_column_name" value="BMI" /> | |
228 <param name="cond" value="int(c8) / (int(c7) * int(c7)) * 10000" /> | |
229 <param name="auto_col_types" value="false" /> | |
230 <param name="action" value="--non-computable-default" /> | |
231 <param name="default_value" value="NA" /> | |
232 <output name="out_file1" file="olympics_bmi_out.tab" /> | |
233 </test> | |
234 <!-- Test operation used by iwc SARS-CoV-2 consensus building WF that | |
235 turns a 3-column CHROM POS REF tabular dataset into a 3-column BED | |
236 dataset. --> | |
237 <test> | |
238 <param name="input" value="chrom_pos_ref.tab" ftype="tabular" /> | |
239 <repeat name="expressions"> | |
240 <param name="cond" value="int(c2) - (len(c3) == 1)" /> | |
241 <conditional name="add_column"> | |
242 <param name="mode" value="R" /> | |
243 <param name="pos" value="2" /> | |
244 </conditional> | |
245 </repeat> | |
246 <repeat name="expressions"> | |
247 <param name="cond" value="int(c2) + ((len(c3) - 1) or 1)" /> | |
248 <conditional name="add_column"> | |
249 <param name="mode" value="R" /> | |
250 <param name="pos" value="3" /> | |
251 </conditional> | |
252 </repeat> | |
253 <output name="out_file1" file="bed_from_chrom_pos_ref.bed" /> | |
254 </test> | |
255 <!-- Test failure on expression syntax errors --> | |
256 <test expect_failure="true"> | |
257 <param name="cond" value="c3- = c2"/> | |
258 <param name="input" value="1.bed" ftype="bed" /> | |
259 <assert_stderr> | |
260 <has_text text="syntax error during parsing." /> | |
261 </assert_stderr> | |
262 </test> | |
263 <!-- Test failure on expression NameErrors --> | |
264 <test expect_failure="true"> | |
265 <param name="cond" value="floatfloat(c3-c2)"/> | |
266 <param name="input" value="1.bed" ftype="bed" /> | |
267 <assert_stderr> | |
268 <has_text text="name 'floatfloat' is not defined" /> | |
269 </assert_stderr> | |
270 </test> | |
271 <!-- Test failure on non-existent column ref --> | |
272 <test expect_failure="true"> | |
273 <param name="cond" value="c7 - c2"/> | |
274 <param name="input" value="1.bed" ftype="bed" /> | |
275 <assert_stderr> | |
276 <has_text text="name 'c7' is not defined" /> | |
277 </assert_stderr> | |
278 </test> | |
279 <!-- Test failure on non-computable expression --> | |
280 <test expect_failure="true"> | |
281 <param name="cond" value="c3 / 0"/> | |
282 <param name="input" value="1.bed" ftype="bed" /> | |
283 <assert_stderr> | |
284 <has_text text="division by zero" /> | |
285 </assert_stderr> | |
286 </test> | |
287 <!-- Test keep-non-computable prevents failure --> | |
288 <test> | |
289 <param name="cond" value="c3 / 0"/> | |
290 <param name="input" value="1.bed" ftype="bed" /> | |
291 <param name="action" value="--keep-non-computable" /> | |
292 <output name="out_file1" file="1.bed" /> | |
293 </test> | |
121 </tests> | 294 </tests> |
122 <help><![CDATA[ | 295 <help><![CDATA[ |
123 | |
124 .. class:: infomark | 296 .. class:: infomark |
125 | 297 |
126 **TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* | 298 **TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* |
127 | 299 |
128 ----- | 300 ----- |
129 | 301 |
130 **What it does** | 302 **What it does** |
131 | 303 |
132 This tool computes an expression for every row of a dataset and appends the result as a new column (field). | 304 This tool computes an expression on every row of a dataset and appends or inserts the result as a new column (field). |
305 | |
306 Several expressions can be specified and will be applied sequentially to each row. | |
307 | |
308 **Expression rules** | |
133 | 309 |
134 - Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file | 310 - Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file |
135 | 311 |
136 - **c3-c2** will add a length column to the dataset if **c2** and **c3** are start and end position | 312 - The following built-in Python functions are available for use in expressions:: |
313 | |
314 abs | all | any | ascii | bin | bool | chr | ceil | complex | divmod | |
315 | |
316 exp | float | floor | format | hex | int | len | list | log | log10 | |
317 | |
318 list | map | max | min | oct | ord | pow | range | reversed | |
319 | |
320 round | set | sorted | sqrt | str | sum | type | |
321 | |
322 - In addition the numpy function ``format_float_positional`` is available to | |
323 control the formatting of floating point numbers. | |
324 | |
325 - Expressions can be chained, and the tool will keep track of newly added | |
326 columns while working through the chain. This means you can reference a column | |
327 that was created as the result of a previous expression in later ones. | |
137 | 328 |
138 ----- | 329 ----- |
139 | 330 |
140 **Example** | 331 **Simple examples** |
141 | 332 |
142 If this is your input:: | 333 If this is your input:: |
143 | 334 |
144 chr1 151077881 151077918 2 200 - | 335 chr1 151077881 151077918 2 200 - |
145 chr1 151081985 151082078 3 500 + | 336 chr1 151081985 151082078 3 500 + |
146 | 337 |
147 computing "c4*c5" will produce:: | 338 computing "c4 * c5" will produce:: |
148 | |
149 chr1 151077881 151077918 2 200 - 400.0 | |
150 chr1 151081985 151082078 3 500 + 1500.0 | |
151 | |
152 if, at the same time, "Round result?" is set to **YES** results will look like this:: | |
153 | 339 |
154 chr1 151077881 151077918 2 200 - 400 | 340 chr1 151077881 151077918 2 200 - 400 |
155 chr1 151081985 151082078 3 500 + 1500 | 341 chr1 151081985 151082078 3 500 + 1500 |
156 | 342 |
157 You can also use this tool to evaluate expressions. For example, computing "c3>=c2" for Input will result in the following:: | 343 You can also use this tool to evaluate expressions. |
344 For example, computing "c3 >= c2" for the input above will result in the following:: | |
158 | 345 |
159 chr1 151077881 151077918 2 200 - True | 346 chr1 151077881 151077918 2 200 - True |
160 chr1 151081985 151082078 3 500 + True | 347 chr1 151081985 151082078 3 500 + True |
161 | 348 |
162 or computing "type(c2)==type('') for Input will return:: | 349 Similarly, computing "type(c2) == type(c3) will return:: |
163 | 350 |
164 chr1 151077881 151077918 2 200 - False | 351 chr1 151077881 151077918 2 200 - True |
165 chr1 151081985 151082078 3 500 + False | 352 chr1 151081985 151082078 3 500 + True |
166 | 353 |
167 | 354 ----- |
168 The following built-in functions are available:: | 355 |
169 | 356 **Error handling** |
170 abs | all | any | bin | bool | chr | ceil | cmp | complex | 357 |
171 | 358 The tool will always fail on syntax errors in and other unrecoverable parsing |
172 divmod | exp | float | log | log10 | floor | hex | int | len | long | 359 errors with any of your expressions. For other problems, however, it offers |
173 | 360 control over how they should be handled: |
174 max | min | oct | ord | pow | range | reversed | 361 |
175 | 362 1. The default for "Autodetect column types" is "Yes", which means the tool |
176 round | sorted | sqrt | str | sum | type | unichr | unicode | | 363 will evaluate each column value as the type that Galaxy assumes for the |
177 | 364 column. This default behavior will allow you to write simpler expressions. |
365 The arithmetic expression "c4 * c5" from the first simple example, | |
366 for instance, works only because Galaxy realizes that c4 and c5 are integer | |
367 columns. Occasionally, this autodetection can cause issues. A common | |
368 such situation are missing values in columns that Galaxy thinks are of | |
369 numeric type. If you're getting errors like "Failed to convert some of the | |
370 columns in line #X ...", a solution might be to turn off column type | |
371 autodetection. The price you will have to pay for doing so is that now you | |
372 will have to handle type conversions yourself. In the first example you would | |
373 now have to use the epression: "int(c4) * int(c5)". | |
374 | |
375 2. By default, if any expression references columns that are not existing before | |
376 that expression gets computed, the tool will fail, but you can uncheck the | |
377 "Fail on references to non-existent columns" option. If you do so, the result | |
378 will depend on your choice for "If an expression cannot be computed for a row" | |
379 (see 3.) | |
380 | |
381 3. The default for rows, for which an expression fails to compute is, again, to | |
382 fail the tool run, but you can also choose to: | |
383 | |
384 - skip the row on output | |
385 | |
386 This is a simple way to only keep lines conforming to an expected standard. | |
387 It is also easy to mask problems with your expressions with this option so | |
388 take a look at the results and try to understand what gets skipped and for | |
389 what reasons (the stdout of the tool will contain information about both). | |
390 | |
391 - keep the row unchanged | |
392 | |
393 This can be a good solution if your input contains special separator lines | |
394 that don't follow the general tabular format of other lines and you would | |
395 like to keep those lines | |
396 | |
397 - produce an empty column value for the row | |
398 | |
399 This will use the empty string as a substitute for non-computable items. | |
400 Different from the "keep the row unchanged option" the problematic line will | |
401 have a column added or changed. This option is a good choice for inputs | |
402 in which all rows have the same tabular layout where you want to make sure | |
403 that the same is true for the output, i.e. that all output lines still have | |
404 the same number of columns. | |
405 | |
406 - fill in a replacement value | |
407 | |
408 This option is very similar to the previous one, but lets you control the | |
409 replacement value. | |
410 | |
411 **Example** | |
412 | |
413 In the following input:: | |
414 | |
415 chr1 151077881 151077918 2 200 - | |
416 chr1 151081985 151082078 3 500 + | |
417 chr1 151090031 151090938 4 700 | |
418 | |
419 the last line does not have a strand column. This violates the bed file format | |
420 specification, which says that unknown strand is to be encoded as ``.`` in the | |
421 strand column. | |
422 | |
423 You can fix the file with the following tool run: | |
424 | |
425 **Add expression**: `c6` | |
426 | |
427 **Mode of the operation**: `Replace` | |
428 | |
429 **Use new column to replace column number**: `6` | |
430 | |
431 **Fail on references to non-existent columns**: `No` | |
432 | |
433 **If an expression cannot be computed for a row**: `Fill in a replacement value` | |
434 | |
435 **Replacement value**: `.` | |
178 ]]></help> | 436 ]]></help> |
179 <citations /> | 437 <citations /> |
180 </tool> | 438 </tool> |