view column_maker.xml @ 8:227e82286a0e draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/column_maker commit c623e4b9f6dd6ff4b88d9386f3b0a0cb497e0a0e"
author devteam
date Wed, 24 Feb 2021 05:19:12 +0000
parents e7c273e8d4d6
children 33b81f9ea109
line wrap: on
line source

<tool id="Add_a_column1" name="Compute" version="1.6">
    <description>an expression on every row</description>
    <requirements>
        <requirement type="package" version="3.8">python</requirement>
        <requirement type="package" version="1.19.1">numpy</requirement>
    </requirements>
    <command detect_errors="aggressive"><![CDATA[
        ln -s '$input' data &&

        ## inject colums and column_types metadata into inputs json
        #import json
        #set inputs_dict = json.load(open($inputs))
        #set inputs_dict['columns'] = $input.metadata.columns
        #set inputs_dict['column_types'] = $input.metadata.column_types
        ## flatten conditional
        #if $header_lines_conditional.header_lines_select == "yes":
            #set inputs_dict['header_new_column_name'] = str($header_lines_conditional.header_new_column_name)
        #end if
        #set x = json.dump($inputs_dict, open($inputs, 'w'))

        python '$__tool_directory__/column_maker.py'
             data '$out_file1'
             --load_json '$inputs'
    ]]></command>
    <configfiles>
      <inputs name="inputs"/>
    </configfiles>
    <inputs>
        <param name="cond" type="text" value="c3-c2" label="Add expression">
            <sanitizer>
                <valid initial="default">
                    <add value="&lt;" />
                    <add value="&gt;" />
                    <add value="&quot;" />
                    <add value="&apos;" />
                </valid>
            </sanitizer>
        </param>
        <param format="tabular" name="input" type="data" label="as a new column to" help="Dataset missing? See TIP below"/>
        <param name="round" type="boolean" truevalue="yes" falsevalue="no" label="Round result?" />
        <param name="avoid_scientific_notation" type="boolean" truevalue="yes" falsevalue="no"
        label="Avoid scientific notation"
        help="If yes, use fully expanded decimal representation when writing new columns (use only if expression produces decimal numbers)." />
        <conditional name="header_lines_conditional">
            <param name="header_lines_select" type="select"
            label="Input has a header line with column names?"
            help="Select Yes to be able to specify a name for the new column and have it added to the header line. If you select No, the first line will be treated as a regular line: If it is empty or starts with a # character it will be skipped, otherwise the tool will attempt to compute the specified expression on it." >
                <option value="no" >No</option>
                <option value="yes" >Yes</option>
            </param>
            <when value="no">
            </when>
            <when value="yes">
                <param name="header_new_column_name" type="text" value="New Column" label="The new column name" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format_source="input" name="out_file1" metadata_source="input"/>
    </outputs>
    <tests>
        <test>
            <param name="cond" value="c3-c2"/>
            <param name="input" value="1.bed"/>
            <param name="round" value="false"/>
            <output name="out_file1" file="column_maker_out1.interval"/>
        </test>
        <test>
            <param name="cond" value="c4*1"/>
            <param name="input" value="1.interval"/>
            <param name="round" value="false"/>
            <output name="out_file1" file="column_maker_out2.interval"/>
        </test>
        <test>
            <param name="cond" value="c4*1"/>
            <param name="input" value="1.header.tsv"/>
            <param name="round" value="false"/>
            <conditional name="header_lines_conditional">
                <param name="header_lines_select" value="yes" />
                <param name="header_new_column_name" value="value1_again" />
            </conditional>
            <output name="out_file1" file="column_maker_out2.header.tsv"/>
        </test>
        <test>
            <param name="cond" value="c4*1"/>
            <param name="input" value="1.interval"/>
            <param name="round" value="true"/>
            <output name="out_file1" file="column_maker_out3.interval"/>
        </test>
        <test>
             <!-- test that single column input works -->
             <param name="cond" value="c1/10"/>
             <param name="input" value="1.tab" ftype="tabular"/>
             <param name="round" value="no"/>
             <output name="out_file1" file="column_maker_out4.tab"/>
         </test>
         <test>
            <param name="cond" value="float(.0000000000001)"/>
            <param name="input" value="1.bed"/>
            <param name="round" value="false"/>
            <output name="out_file1">
                <assert_contents>
                    <has_text text="CCDS10397" />
                    <has_text text="1e-13" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="cond" value="float(.0000000000001)"/>
            <param name="input" value="1.bed"/>
            <param name="round" value="false"/>
            <param name="avoid_scientific_notation" value="true"/>
            <output name="out_file1">
                <assert_contents>
                    <has_text text="CCDS10397" />
                    <has_text text=".0000000000001" />
                    <not_has_text text="1e-13" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[

 .. class:: infomark

**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*

-----

**What it does**

This tool computes an expression for every row of a dataset and appends the result as a new column (field).

- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file

- **c3-c2** will add a length column to the dataset if **c2** and **c3** are start and end position

-----

**Example**

If this is your input::

   chr1  151077881  151077918  2  200  -
   chr1  151081985  151082078  3  500  +

computing "c4*c5" will produce::

   chr1  151077881  151077918  2  200  -   400.0
   chr1  151081985  151082078  3  500  +  1500.0

if, at the same time, "Round result?" is set to **YES** results will look like this::

   chr1  151077881  151077918  2  200  -   400
   chr1  151081985  151082078  3  500  +  1500

You can also use this tool to evaluate expressions. For example, computing "c3>=c2" for Input will result in the following::

   chr1  151077881  151077918  2  200  -  True
   chr1  151081985  151082078  3  500  +  True

or computing "type(c2)==type('') for Input will return::

   chr1  151077881  151077918  2  200  -  False
   chr1  151081985  151082078  3  500  +  False


The following built-in functions are available::

  abs | all | any | bin | bool | chr | ceil | cmp | complex

  divmod | exp | float | log | log10 | floor | hex | int | len | long

  max | min | oct | ord | pow | range | reversed

  round | sorted | sqrt | str | sum | type | unichr | unicode |

    ]]></help>
    <citations />
</tool>