Mercurial > repos > devteam > column_maker

--- a/column_maker.py	Wed Jul 15 14:38:05 2020 +0000
+++ b/column_maker.py	Wed Dec 30 00:49:52 2020 +0000
@@ -5,31 +5,48 @@
 original file. The tool will skip over invalid lines within the file,
 informing the user about the number of lines skipped.
 """
-from __future__ import print_function

+import argparse
+import json
 import re
-import sys
-
-assert sys.version_info[:2] >= (2, 4)

-inp_file = sys.argv[1]
-out_file = sys.argv[2]
-expr = sys.argv[3]
-round_result = sys.argv[4]
+parser = argparse.ArgumentParser()
+parser.add_argument('input', type=argparse.FileType('r'), help="input file")
+parser.add_argument('output', type=argparse.FileType('wt'), help="output file")
+parser.add_argument('cond', nargs='?', type=str, help="expression")
+parser.add_argument('round', nargs='?', type=str, choices=['yes', 'no'],
+                    help="round result")
+parser.add_argument('columns', nargs='?', type=int, help="number of columns")
+parser.add_argument('column_types', nargs='?', type=str, help="comma separated list of column types")
+parser.add_argument('avoid_scientific_notation', nargs='?', type=str, choices=['yes', 'no'],
+                    help="avoid scientific notation")
+parser.add_argument('--load_json', default=None, type=argparse.FileType('r'),
+                    help="overwrite parsed arguments from json file")
+args = parser.parse_args()
+
+argparse_dict = vars(args)
+if args.load_json:
+    json_dict = json.load(args.load_json)
+    argparse_dict.update(json_dict)
+
+fh = argparse_dict['input']
+out = argparse_dict['output']
+expr = argparse_dict['cond']
+round_result = argparse_dict['round']
 try:
-    in_columns = int(sys.argv[5])
+    in_columns = int(argparse_dict['columns'])
 except Exception:
     exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data.")
 if in_columns < 2:
     # To be considered tabular, data must fulfill requirements of the sniff.is_column_based() method.
     exit("Missing or invalid 'columns' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data.")
 try:
-    in_column_types = sys.argv[6].split(',')
+    in_column_types = argparse_dict['column_types'].split(',')
 except Exception:
     exit("Missing or invalid 'column_types' metadata value, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data.")
 if len(in_column_types) != in_columns:
     exit("The 'columns' metadata setting does not conform to the 'column_types' metadata setting, click the pencil icon in the history item and select the Auto-detect option to correct it.  This tool can only be used with tab-delimited data.")
-avoid_scientific_notation = sys.argv[7]
+avoid_scientific_notation = argparse_dict['avoid_scientific_notation']

 # Unescape if input has been escaped
 mapped_str = {
@@ -74,7 +91,6 @@
 invalid_line = None
 lines_kept = 0
 total_lines = 0
-out = open(out_file, 'wt')

 # Read input file, skipping invalid lines, and perform computation that will result in a new column
 code = '''
@@ -89,7 +105,6 @@
 )
 from numpy import format_float_positional

-fh = open(inp_file)
 for i, line in enumerate(fh):
     total_lines += 1
     line = line.rstrip('\\r\\n')
--- a/column_maker.xml	Wed Jul 15 14:38:05 2020 +0000
+++ b/column_maker.xml	Wed Dec 30 00:49:52 2020 +0000
@@ -1,164 +1,170 @@
-<tool id="Add_a_column1" name="Compute" version="1.3.1">
-    <description>an expression on every row</description>
-    <requirements>
-        <requirement type="package" version="2.7.13">python</requirement>
-        <requirement type="package" version="4.4">sed</requirement>
-        <requirement type="package" version="1.14">numpy</requirement>
-    </requirements>
-    <command detect_errors="aggressive"><![CDATA[
-        #if $header_lines_conditional.header_lines_select == "yes":
-            (sed -n '1,1p' '$input' | sed  "s|$|%${header_lines_conditional.header_new_column_name}|" | tr "%" "\t") > header &&
-            sed '1,1d' '$input' > data &&
-        #else:
-            touch header &&
-            ln -s '$input' data &&
-        #end if
-
-        python '$__tool_directory__/column_maker.py'
-            data column_maker_output
-            "$cond"
-            $round
-            ${input.metadata.columns}
-            "${input.metadata.column_types}"
-            $avoid_scientific_notation &&
-        cat header column_maker_output > '$out_file1'
-    ]]></command>
-    <inputs>
-        <param name="cond" type="text" value="c3-c2" label="Add expression"/>
-        <param format="tabular" name="input" type="data" label="as a new column to" help="Dataset missing? See TIP below"/>
-        <param name="round" type="select" label="Round result?">
-            <option value="no">NO</option>
-            <option value="yes">YES</option>
-        </param>
-        <conditional name="header_lines_conditional">
-            <param name="header_lines_select" type="select" label="Skip a header line" help="# characters are already considered as comments and kept" >
-                <option value="no" >no</option>
-                <option value="yes" >yes</option>
-            </param>
-            <when value="no">
-            </when>
-            <when value="yes">
-                <param name="header_new_column_name" type="text" value="New Column" label="The new column name" />
-            </when>
-        </conditional>
-        <param name="avoid_scientific_notation" type="select" label="Avoid scientific notation" help="If yes, use fully expanded decimal representation when writing new columns (use only if expression produces decimal numbers).">
-            <option value="no">no</option>
-            <option value="yes">yes</option>
-        </param>
-    </inputs>
-    <outputs>
-        <data format_source="input" name="out_file1" metadata_source="input"/>
-    </outputs>
-    <tests>
-        <test>
-            <param name="cond" value="c3-c2"/>
-            <param name="input" value="1.bed"/>
-            <param name="round" value="no"/>
-            <output name="out_file1" file="column_maker_out1.interval"/>
-        </test>
-        <test>
-            <param name="cond" value="c4*1"/>
-            <param name="input" value="1.interval"/>
-            <param name="round" value="no"/>
-            <output name="out_file1" file="column_maker_out2.interval"/>
-        </test>
-        <test>
-            <param name="cond" value="c4*1"/>
-            <param name="input" value="1.header.tsv"/>
-            <param name="round" value="no"/>
-            <conditional name="header_lines_conditional">
-                <param name="header_lines_select" value="yes" />
-                <param name="header_new_column_name" value="value1_again" />
-            </conditional>
-            <output name="out_file1" file="column_maker_out2.header.tsv"/>
-        </test>
-        <test>
-            <param name="cond" value="c4*1"/>
-            <param name="input" value="1.interval"/>
-            <param name="round" value="yes"/>
-            <output name="out_file1" file="column_maker_out3.interval"/>
-        </test>
-        <test>
-            <param name="cond" value="float(.0000000000001)"/>
-            <param name="input" value="1.bed"/>
-            <param name="round" value="no"/>
-            <output name="out_file1">
-                <assert_contents>
-                    <has_text text="CCDS10397" />
-                    <has_text text="1e-13" />
-                </assert_contents>
-            </output>
-        </test>
-        <test>
-            <param name="cond" value="float(.0000000000001)"/>
-            <param name="input" value="1.bed"/>
-            <param name="round" value="no"/>
-            <param name="avoid_scientific_notation" value="yes"/>
-            <output name="out_file1">
-                <assert_contents>
-                    <has_text text="CCDS10397" />
-                    <has_text text=".0000000000001" />
-                    <not_has_text text="1e-13" />
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help>
-
- .. class:: infomark
-
-**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
-
------
-
-**What it does**
-
-This tool computes an expression for every row of a dataset and appends the result as a new column (field).
-
-- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file
-
-- **c3-c2** will add a length column to the dataset if **c2** and **c3** are start and end position
-
------
-
-**Example**
-
-If this is your input::
-
-   chr1  151077881  151077918  2  200  -
-   chr1  151081985  151082078  3  500  +
-
-computing "c4*c5" will produce::
-
-   chr1  151077881  151077918  2  200  -   400.0
-   chr1  151081985  151082078  3  500  +  1500.0
-
-if, at the same time, "Round result?" is set to **YES** results will look like this::
-
-   chr1  151077881  151077918  2  200  -   400
-   chr1  151081985  151082078  3  500  +  1500
-
-You can also use this tool to evaluate expressions. For example, computing "c3>=c2" for Input will result in the following::
-
-   chr1  151077881  151077918  2  200  -  True
-   chr1  151081985  151082078  3  500  +  True
-
-or computing "type(c2)==type('') for Input will return::
-
-   chr1  151077881  151077918  2  200  -  False
-   chr1  151081985  151082078  3  500  +  False
-
-
-The following built-in functions are available::
-
-  abs | all | any | bin | bool | chr | ceil | cmp | complex
-
-  divmod | exp | float | log | log10 | floor | hex | int | len | long
-
-  max | min | oct | ord | pow | range | reversed
-
-  round | sorted | sqrt | str | sum | type | unichr | unicode |
-
-    </help>
-    <citations />
-</tool>
+<tool id="Add_a_column1" name="Compute" version="1.4">
+    <description>an expression on every row</description>
+    <requirements>
+        <requirement type="package" version="3.8">python</requirement>
+        <requirement type="package" version="4.4">sed</requirement>
+        <requirement type="package" version="1.19.1">numpy</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+        #if $header_lines_conditional.header_lines_select == "yes":
+            (sed -n '1,1p' '$input' | sed  "s|$|%${header_lines_conditional.header_new_column_name}|" | tr "%" "\t") > header &&
+            sed '1,1d' '$input' > data &&
+        #else:
+            touch header &&
+            ln -s '$input' data &&
+        #end if
+
+        ## inject colums and column_types metadata into inputs json
+        #import json
+        #set inputs_dict = json.load(open($inputs))
+        #set inputs_dict['columns'] = $input.metadata.columns
+        #set inputs_dict['column_types'] = $input.metadata.column_types
+        #set x = json.dump($inputs_dict, open($inputs, 'w'))
+
+        python '$__tool_directory__/column_maker.py'
+             data column_maker_output
+             --load_json '$inputs'
+        && cat header column_maker_output > '$out_file1'
+    ]]></command>
+    <configfiles>
+      <inputs name="inputs"/>
+    </configfiles>
+    <inputs>
+        <param name="cond" type="text" value="c3-c2" label="Add expression"/>
+        <param format="tabular" name="input" type="data" label="as a new column to" help="Dataset missing? See TIP below"/>
+        <param name="round" type="select" label="Round result?">
+            <option value="no">NO</option>
+            <option value="yes">YES</option>
+        </param>
+        <conditional name="header_lines_conditional">
+            <param name="header_lines_select" type="select" label="Skip a header line" help="# characters are already considered as comments and kept" >
+                <option value="no" >no</option>
+                <option value="yes" >yes</option>
+            </param>
+            <when value="no">
+            </when>
+            <when value="yes">
+                <param name="header_new_column_name" type="text" value="New Column" label="The new column name" />
+            </when>
+        </conditional>
+        <param name="avoid_scientific_notation" type="select" label="Avoid scientific notation" help="If yes, use fully expanded decimal representation when writing new columns (use only if expression produces decimal numbers).">
+            <option value="no">no</option>
+            <option value="yes">yes</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data format_source="input" name="out_file1" metadata_source="input"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="cond" value="c3-c2"/>
+            <param name="input" value="1.bed"/>
+            <param name="round" value="no"/>
+            <output name="out_file1" file="column_maker_out1.interval"/>
+        </test>
+        <test>
+            <param name="cond" value="c4*1"/>
+            <param name="input" value="1.interval"/>
+            <param name="round" value="no"/>
+            <output name="out_file1" file="column_maker_out2.interval"/>
+        </test>
+        <test>
+            <param name="cond" value="c4*1"/>
+            <param name="input" value="1.header.tsv"/>
+            <param name="round" value="no"/>
+            <conditional name="header_lines_conditional">
+                <param name="header_lines_select" value="yes" />
+                <param name="header_new_column_name" value="value1_again" />
+            </conditional>
+            <output name="out_file1" file="column_maker_out2.header.tsv"/>
+        </test>
+        <test>
+            <param name="cond" value="c4*1"/>
+            <param name="input" value="1.interval"/>
+            <param name="round" value="yes"/>
+            <output name="out_file1" file="column_maker_out3.interval"/>
+        </test>
+        <test>
+            <param name="cond" value="float(.0000000000001)"/>
+            <param name="input" value="1.bed"/>
+            <param name="round" value="no"/>
+            <output name="out_file1">
+                <assert_contents>
+                    <has_text text="CCDS10397" />
+                    <has_text text="1e-13" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="cond" value="float(.0000000000001)"/>
+            <param name="input" value="1.bed"/>
+            <param name="round" value="no"/>
+            <param name="avoid_scientific_notation" value="yes"/>
+            <output name="out_file1">
+                <assert_contents>
+                    <has_text text="CCDS10397" />
+                    <has_text text=".0000000000001" />
+                    <not_has_text text="1e-13" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+
+ .. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
+
+-----
+
+**What it does**
+
+This tool computes an expression for every row of a dataset and appends the result as a new column (field).
+
+- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file
+
+- **c3-c2** will add a length column to the dataset if **c2** and **c3** are start and end position
+
+-----
+
+**Example**
+
+If this is your input::
+
+   chr1  151077881  151077918  2  200  -
+   chr1  151081985  151082078  3  500  +
+
+computing "c4*c5" will produce::
+
+   chr1  151077881  151077918  2  200  -   400.0
+   chr1  151081985  151082078  3  500  +  1500.0
+
+if, at the same time, "Round result?" is set to **YES** results will look like this::
+
+   chr1  151077881  151077918  2  200  -   400
+   chr1  151081985  151082078  3  500  +  1500
+
+You can also use this tool to evaluate expressions. For example, computing "c3>=c2" for Input will result in the following::
+
+   chr1  151077881  151077918  2  200  -  True
+   chr1  151081985  151082078  3  500  +  True
+
+or computing "type(c2)==type('') for Input will return::
+
+   chr1  151077881  151077918  2  200  -  False
+   chr1  151081985  151082078  3  500  +  False
+
+
+The following built-in functions are available::
+
+  abs | all | any | bin | bool | chr | ceil | cmp | complex
+
+  divmod | exp | float | log | log10 | floor | hex | int | len | long
+
+  max | min | oct | ord | pow | range | reversed
+
+  round | sorted | sqrt | str | sum | type | unichr | unicode |
+
+    </help>
+    <citations />
+</tool>