Mercurial > repos > devteam > column_maker

<tool id="Add_a_column1" name="Compute" version="1.4">
    <description>an expression on every row</description>
    <requirements>
        <requirement type="package" version="3.8">python</requirement>
        <requirement type="package" version="4.4">sed</requirement>
        <requirement type="package" version="1.19.1">numpy</requirement>
    </requirements>
    <command detect_errors="aggressive"><![CDATA[
        #if $header_lines_conditional.header_lines_select == "yes":
            (sed -n '1,1p' '$input' | sed  "s|$|%${header_lines_conditional.header_new_column_name}|" | tr "%" "\t") > header &&
            sed '1,1d' '$input' > data &&
        #else:
            touch header &&
            ln -s '$input' data &&
        #end if

        ## inject colums and column_types metadata into inputs json
        #import json
        #set inputs_dict = json.load(open($inputs))
        #set inputs_dict['columns'] = $input.metadata.columns
        #set inputs_dict['column_types'] = $input.metadata.column_types
        #set x = json.dump($inputs_dict, open($inputs, 'w'))

        python '$__tool_directory__/column_maker.py'
             data column_maker_output
             --load_json '$inputs'
        && cat header column_maker_output > '$out_file1'
    ]]></command>
    <configfiles>
      <inputs name="inputs"/>
    </configfiles>
    <inputs>
        <param name="cond" type="text" value="c3-c2" label="Add expression"/>
        <param format="tabular" name="input" type="data" label="as a new column to" help="Dataset missing? See TIP below"/>
        <param name="round" type="select" label="Round result?">
            <option value="no">NO</option>
            <option value="yes">YES</option>
        </param>
        <conditional name="header_lines_conditional">
            <param name="header_lines_select" type="select" label="Skip a header line" help="# characters are already considered as comments and kept" >
                <option value="no" >no</option>
                <option value="yes" >yes</option>
            </param>
            <when value="no">
            </when>
            <when value="yes">
                <param name="header_new_column_name" type="text" value="New Column" label="The new column name" />
            </when>
        </conditional>
        <param name="avoid_scientific_notation" type="select" label="Avoid scientific notation" help="If yes, use fully expanded decimal representation when writing new columns (use only if expression produces decimal numbers).">
            <option value="no">no</option>
            <option value="yes">yes</option>
        </param>
    </inputs>
    <outputs>
        <data format_source="input" name="out_file1" metadata_source="input"/>
    </outputs>
    <tests>
        <test>
            <param name="cond" value="c3-c2"/>
            <param name="input" value="1.bed"/>
            <param name="round" value="no"/>
            <output name="out_file1" file="column_maker_out1.interval"/>
        </test>
        <test>
            <param name="cond" value="c4*1"/>
            <param name="input" value="1.interval"/>
            <param name="round" value="no"/>
            <output name="out_file1" file="column_maker_out2.interval"/>
        </test>
        <test>
            <param name="cond" value="c4*1"/>
            <param name="input" value="1.header.tsv"/>
            <param name="round" value="no"/>
            <conditional name="header_lines_conditional">
                <param name="header_lines_select" value="yes" />
                <param name="header_new_column_name" value="value1_again" />
            </conditional>
            <output name="out_file1" file="column_maker_out2.header.tsv"/>
        </test>
        <test>
            <param name="cond" value="c4*1"/>
            <param name="input" value="1.interval"/>
            <param name="round" value="yes"/>
            <output name="out_file1" file="column_maker_out3.interval"/>
        </test>
        <test>
            <param name="cond" value="float(.0000000000001)"/>
            <param name="input" value="1.bed"/>
            <param name="round" value="no"/>
            <output name="out_file1">
                <assert_contents>
                    <has_text text="CCDS10397" />
                    <has_text text="1e-13" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="cond" value="float(.0000000000001)"/>
            <param name="input" value="1.bed"/>
            <param name="round" value="no"/>
            <param name="avoid_scientific_notation" value="yes"/>
            <output name="out_file1">
                <assert_contents>
                    <has_text text="CCDS10397" />
                    <has_text text=".0000000000001" />
                    <not_has_text text="1e-13" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>

 .. class:: infomark

**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*

-----

**What it does**

This tool computes an expression for every row of a dataset and appends the result as a new column (field).

- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file

- **c3-c2** will add a length column to the dataset if **c2** and **c3** are start and end position

-----

**Example**

If this is your input::

   chr1  151077881  151077918  2  200  -
   chr1  151081985  151082078  3  500  +

computing "c4*c5" will produce::

   chr1  151077881  151077918  2  200  -   400.0
   chr1  151081985  151082078  3  500  +  1500.0

if, at the same time, "Round result?" is set to **YES** results will look like this::

   chr1  151077881  151077918  2  200  -   400
   chr1  151081985  151082078  3  500  +  1500

You can also use this tool to evaluate expressions. For example, computing "c3>=c2" for Input will result in the following::

   chr1  151077881  151077918  2  200  -  True
   chr1  151081985  151082078  3  500  +  True

or computing "type(c2)==type('') for Input will return::

   chr1  151077881  151077918  2  200  -  False
   chr1  151081985  151082078  3  500  +  False


The following built-in functions are available::

  abs | all | any | bin | bool | chr | ceil | cmp | complex

  divmod | exp | float | log | log10 | floor | hex | int | len | long

  max | min | oct | ord | pow | range | reversed

  round | sorted | sqrt | str | sum | type | unichr | unicode |

    </help>
    <citations />
</tool>
author	devteam
date	Wed, 30 Dec 2020 00:49:52 +0000
parents	c6fdc1118036
children	e7c273e8d4d6