view column_maker.xml @ 8:227e82286a0e draft

"planemo upload for repository commit c623e4b9f6dd6ff4b88d9386f3b0a0cb497e0a0e"
author devteam
date Wed, 24 Feb 2021 05:19:12 +0000
parents e7c273e8d4d6
children 33b81f9ea109
line wrap: on
line source

<tool id="Add_a_column1" name="Compute" version="1.6">
    <description>an expression on every row</description>
        <requirement type="package" version="3.8">python</requirement>
        <requirement type="package" version="1.19.1">numpy</requirement>
    <command detect_errors="aggressive"><![CDATA[
        ln -s '$input' data &&

        ## inject colums and column_types metadata into inputs json
        #import json
        #set inputs_dict = json.load(open($inputs))
        #set inputs_dict['columns'] = $input.metadata.columns
        #set inputs_dict['column_types'] = $input.metadata.column_types
        ## flatten conditional
        #if $header_lines_conditional.header_lines_select == "yes":
            #set inputs_dict['header_new_column_name'] = str($header_lines_conditional.header_new_column_name)
        #end if
        #set x = json.dump($inputs_dict, open($inputs, 'w'))

        python '$__tool_directory__/'
             data '$out_file1'
             --load_json '$inputs'
      <inputs name="inputs"/>
        <param name="cond" type="text" value="c3-c2" label="Add expression">
                <valid initial="default">
                    <add value="&lt;" />
                    <add value="&gt;" />
                    <add value="&quot;" />
                    <add value="&apos;" />
        <param format="tabular" name="input" type="data" label="as a new column to" help="Dataset missing? See TIP below"/>
        <param name="round" type="boolean" truevalue="yes" falsevalue="no" label="Round result?" />
        <param name="avoid_scientific_notation" type="boolean" truevalue="yes" falsevalue="no"
        label="Avoid scientific notation"
        help="If yes, use fully expanded decimal representation when writing new columns (use only if expression produces decimal numbers)." />
        <conditional name="header_lines_conditional">
            <param name="header_lines_select" type="select"
            label="Input has a header line with column names?"
            help="Select Yes to be able to specify a name for the new column and have it added to the header line. If you select No, the first line will be treated as a regular line: If it is empty or starts with a # character it will be skipped, otherwise the tool will attempt to compute the specified expression on it." >
                <option value="no" >No</option>
                <option value="yes" >Yes</option>
            <when value="no">
            <when value="yes">
                <param name="header_new_column_name" type="text" value="New Column" label="The new column name" />
        <data format_source="input" name="out_file1" metadata_source="input"/>
            <param name="cond" value="c3-c2"/>
            <param name="input" value="1.bed"/>
            <param name="round" value="false"/>
            <output name="out_file1" file="column_maker_out1.interval"/>
            <param name="cond" value="c4*1"/>
            <param name="input" value="1.interval"/>
            <param name="round" value="false"/>
            <output name="out_file1" file="column_maker_out2.interval"/>
            <param name="cond" value="c4*1"/>
            <param name="input" value="1.header.tsv"/>
            <param name="round" value="false"/>
            <conditional name="header_lines_conditional">
                <param name="header_lines_select" value="yes" />
                <param name="header_new_column_name" value="value1_again" />
            <output name="out_file1" file="column_maker_out2.header.tsv"/>
            <param name="cond" value="c4*1"/>
            <param name="input" value="1.interval"/>
            <param name="round" value="true"/>
            <output name="out_file1" file="column_maker_out3.interval"/>
             <!-- test that single column input works -->
             <param name="cond" value="c1/10"/>
             <param name="input" value="" ftype="tabular"/>
             <param name="round" value="no"/>
             <output name="out_file1" file=""/>
            <param name="cond" value="float(.0000000000001)"/>
            <param name="input" value="1.bed"/>
            <param name="round" value="false"/>
            <output name="out_file1">
                    <has_text text="CCDS10397" />
                    <has_text text="1e-13" />
            <param name="cond" value="float(.0000000000001)"/>
            <param name="input" value="1.bed"/>
            <param name="round" value="false"/>
            <param name="avoid_scientific_notation" value="true"/>
            <output name="out_file1">
                    <has_text text="CCDS10397" />
                    <has_text text=".0000000000001" />
                    <not_has_text text="1e-13" />

 .. class:: infomark

**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*


**What it does**

This tool computes an expression for every row of a dataset and appends the result as a new column (field).

- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file

- **c3-c2** will add a length column to the dataset if **c2** and **c3** are start and end position



If this is your input::

   chr1  151077881  151077918  2  200  -
   chr1  151081985  151082078  3  500  +

computing "c4*c5" will produce::

   chr1  151077881  151077918  2  200  -   400.0
   chr1  151081985  151082078  3  500  +  1500.0

if, at the same time, "Round result?" is set to **YES** results will look like this::

   chr1  151077881  151077918  2  200  -   400
   chr1  151081985  151082078  3  500  +  1500

You can also use this tool to evaluate expressions. For example, computing "c3>=c2" for Input will result in the following::

   chr1  151077881  151077918  2  200  -  True
   chr1  151081985  151082078  3  500  +  True

or computing "type(c2)==type('') for Input will return::

   chr1  151077881  151077918  2  200  -  False
   chr1  151081985  151082078  3  500  +  False

The following built-in functions are available::

  abs | all | any | bin | bool | chr | ceil | cmp | complex

  divmod | exp | float | log | log10 | floor | hex | int | len | long

  max | min | oct | ord | pow | range | reversed

  round | sorted | sqrt | str | sum | type | unichr | unicode |

    <citations />