Mercurial > repos > bgruening > text_processing
changeset 10:c78b1767db2b draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 10052765d6b712cf7d38356af4251fcc38a339b6-dirty
author | bgruening |
---|---|
date | Fri, 26 Feb 2016 12:22:54 -0500 |
parents | d9819ccb9ca7 |
children | e6d48dc4e6ba |
files | cat.xml cut.xml easyjoin.xml find_and_replace.xml grep.xml head.xml multijoin.xml recurring_lines.xml replace_text_in_column.xml replace_text_in_line.xml sort.xml sorted_uniq.xml tac.xml tail.xml test-data/egrep1.txt test-data/egrep_results1.txt test-data/egrep_results2.txt tool_dependencies.xml unfold_column.py unfold_column.xml |
diffstat | 19 files changed, 68 insertions(+), 30 deletions(-) [+] |
line wrap: on
line diff
--- a/cat.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/cat.xml Fri Feb 26 12:22:54 2016 -0500 @@ -20,7 +20,7 @@ <inputs> <param name="inputs" multiple="true" type="data" format="txt" label="Datasets to concatenate"/> <repeat name="queries" title="Dataset"> - <param name="inputs2" type="data" multiple="True" label="Select" /> + <param name="inputs2" type="data" format="txt" multiple="True" label="Select" /> </repeat> </inputs> <outputs>
--- a/cut.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/cut.xml Fri Feb 26 12:22:54 2016 -0500 @@ -1,5 +1,5 @@ <tool id="tp_cut_tool" name="Cut" version="@BASE_VERSION@.0"> - <description>columns from a table</description> + <description>columns from a table (cut)</description> <macros> <import>macros.xml</import> </macros> @@ -43,7 +43,7 @@ <param name="list" type="data_column" data_ref="input" multiple="true" label="List of Fields" help="(-f)" /> </when> <when value="-c"> - <param name="list" type="text" size="20" value="" label="List of characters" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> + <param name="list" type="text" value="" label="List of characters" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> <sanitizer> <valid initial="string.printable"> <remove value="'"/> @@ -52,7 +52,7 @@ </param> </when> <when value="-b"> - <param name="list" type="text" size="20" value="" label="List of Bytes" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> + <param name="list" type="text" value="" label="List of Bytes" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> <sanitizer> <valid initial="string.printable"> <remove value="'"/>
--- a/easyjoin.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/easyjoin.xml Fri Feb 26 12:22:54 2016 -0500 @@ -44,7 +44,7 @@ label="First line is a header line" help="Use if first line contains column headers. It will not be sorted." /> <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." /> - <param name="empty_string_filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields"> + <param name="empty_string_filler" type="text" value="0" label="Value to put in unpaired (empty) fields"> <sanitizer> <valid initial="string.printable"> <remove value="'"/>
--- a/find_and_replace.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/find_and_replace.xml Fri Feb 26 12:22:54 2016 -0500 @@ -21,14 +21,14 @@ </command> <inputs> <param name="infile" format="txt" type="data" label="File to process" /> - <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > + <param name="find_pattern" type="text" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > <sanitizer> <valid initial="string.printable"> <remove value="'"/> </valid> </sanitizer> </param> - <param name="replace_pattern" type="text" size="20" label="Replace with" + <param name="replace_pattern" type="text" label="Replace with" help="Use simple text, or $& (dollar-ampersand) and $1 $2 $3 to refer to matched text. See examples below." > <sanitizer> <valid initial="string.printable">
--- a/grep.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/grep.xml Fri Feb 26 12:22:54 2016 -0500 @@ -13,7 +13,7 @@ GREP_COLOR='1;34' grep --color=always - -P + $regex_type -A $lines_after -B $lines_before $invert @@ -22,7 +22,7 @@ '${infile}' | $__tool_directory__/ansi2html.sh > "${output}" #else: grep - -P + $regex_type -A $lines_after -B $lines_before $invert @@ -41,8 +41,14 @@ <option value="">Match</option> <option value="-v">Don't Match</option> </param> - - <param name="url_paste" type="text" size="40" label="Regular Expression" help="See below for more details"> + + <param name="regex_type" type="select" label="Type of regex"> + <option value="-G">Basic</option> + <option value="-P" selected="true">Perl</option> + <option value="-E">Extended (egrep)</option> + </param> + + <param name="url_paste" type="text" label="Regular Expression" help="See below for more details"> <sanitizer> <valid initial="string.printable"> <remove value="'"/> @@ -76,6 +82,7 @@ <!-- grep a FASTA file for sequences with specific motif --> <param name="infile" value="grep1.txt" /> <param name="case_sensitive" value="case sensitive" /> + <param name="regex_type" value="-P" /> <param name="invert" value="" /> <param name="url_paste" value="AA.{2}GT" /> <param name="lines_before" value="1" /> @@ -88,6 +95,7 @@ show highlighed output --> <param name="infile" value="grep1.txt" /> <param name="case_sensitive" value="case sensitive" /> + <param name="regex_type" value="-P" /> <param name="invert" value="" /> <param name="url_paste" value="AA.{2}GT" /> <param name="lines_before" value="0" /> @@ -95,6 +103,28 @@ <param name="color" value="COLOR" /> <output name="output" file="grep_results2.html" /> </test> + <test><!-- tests egrep --> + <param name="infile" value="egrep1.txt" /> + <param name="case_sensitive" value="case sensitive" /> + <param name="regex_type" value="-E" /> + <param name="invert" value="" /> + <param name="url_paste" value="[^ ]+" /> + <param name="lines_before" value="0" /> + <param name="lines_after" value="0" /> + <param name="color" value="NOCOLOR" /> + <output name="output" file="egrep_results1.txt" /> + </test> + <test><!-- same regex as egrep test, but different outcome with basic regex --> + <param name="infile" value="egrep1.txt" /> + <param name="case_sensitive" value="case sensitive" /> + <param name="regex_type" value="-G" /> + <param name="invert" value="" /> + <param name="url_paste" value="[^ ]+" /> + <param name="lines_before" value="0" /> + <param name="lines_after" value="0" /> + <param name="color" value="NOCOLOR" /> + <output name="output" file="egrep_results2.txt" />> + </test> </tests> <help> <![CDATA[
--- a/head.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/head.xml Fri Feb 26 12:22:54 2016 -0500 @@ -20,7 +20,7 @@ <option value="">Keep first lines</option> <option value="-">Remove last lines</option> </param> - <param name="count" type="integer" size="5" value="10" + <param name="count" type="integer" value="10" label="Number of lines" help="These will be kept/discarded depending on 'operation'. (--lines)" /> </inputs> <outputs>
--- a/multijoin.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/multijoin.xml Fri Feb 26 12:22:54 2016 -0500 @@ -36,7 +36,7 @@ <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" /> <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" /> <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." /> - <param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields"> + <param name="filler" type="text" value="0" label="Value to put in unpaired (empty) fields"> <sanitizer> <valid initial="string.printable"> <remove value="'"/>
--- a/recurring_lines.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/recurring_lines.xml Fri Feb 26 12:22:54 2016 -0500 @@ -19,7 +19,7 @@ </command> <inputs> <repeat name="token_set" title=" selection" min="1"> - <param name="line" type="text" size="30" + <param name="line" type="text" label="Characters to insert" help="Specify the characters that will be inserted X times in every line"/> <conditional name="repeat_select"> <param name="repeat_select_opts" type="select" label="Specify the number of iterations by"> @@ -27,7 +27,7 @@ <option value="user" selected="True">User defined number</option> </param> <when value="user"> - <param name="times" size="10" type="integer" value="10" min="1" label="How many times?"/> + <param name="times" type="integer" value="10" min="1" label="How many times?"/> </when> <when value="file"> <param name="infile" type="data" format="txt" label="Template file"
--- a/replace_text_in_column.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/replace_text_in_column.xml Fri Feb 26 12:22:54 2016 -0500 @@ -21,14 +21,14 @@ <param format="tabular" name="infile" type="data" label="File to process" /> <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" /> - <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > + <param name="find_pattern" type="text" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > <sanitizer> <valid initial="string.printable"> <remove value="'"/> </valid> </sanitizer> </param> - <param name="replace_pattern" type="text" size="20" label="Replace with" help="Use simple text, or & (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." > + <param name="replace_pattern" type="text" label="Replace with" help="Use simple text, or & (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." > <sanitizer> <valid initial="string.printable"> <remove value="'"/>
--- a/replace_text_in_line.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/replace_text_in_line.xml Fri Feb 26 12:22:54 2016 -0500 @@ -19,14 +19,14 @@ </command> <inputs> <param format="txt" name="infile" type="data" label="File to process" /> - <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > + <param name="find_pattern" type="text" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > <sanitizer> <valid initial="string.printable"> <remove value="'"/> </valid> </sanitizer> </param> - <param name="replace_pattern" type="text" size="20" label="Replace with:" help="Use simple text, or & (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." > + <param name="replace_pattern" type="text" label="Replace with:" help="Use simple text, or & (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." > <sanitizer> <valid initial="string.printable"> <remove value="'"/>
--- a/sort.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/sort.xml Fri Feb 26 12:22:54 2016 -0500 @@ -26,7 +26,7 @@ </command> <inputs> <param format="tabular" name="infile" type="data" label="Sort Query" /> - <param name="header" type="integer" size="5" value="0" + <param name="header" type="integer" value="0" label="Number of header lines" help="These will be ignored during sort."> <validator type="in_range" message="Negative values are not allowed." min="0"/> </param>
--- a/sorted_uniq.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/sorted_uniq.xml Fri Feb 26 12:22:54 2016 -0500 @@ -64,7 +64,7 @@ <param name="ignorecase" type="boolean" truevalue="-i" falsevalue="" label="Ignore differences in case when comparing" help="(-i)"/> - <param name="skipfields" type="integer" size="2" value="0" + <param name="skipfields" type="integer" value="0" label="Avoid comparing the first N fields" help="Use zero to start from the first field. (-f)" /> </inputs> <outputs>
--- a/tac.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/tac.xml Fri Feb 26 12:22:54 2016 -0500 @@ -32,7 +32,7 @@ label="Attach the separator before instead of after" help="(--before)"/> <param name="regex" type="boolean" truevalue="-r" falsevalue="" checked="True" label="Interpret the separator as a regular expression" help="(--regex)"/> - <param name="separator_string" size="5" type="text" value="" + <param name="separator_string" type="text" value="" label="Separator to use" help="(--separator)" /> </when> </conditional>
--- a/tail.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/tail.xml Fri Feb 26 12:22:54 2016 -0500 @@ -19,7 +19,7 @@ <option value="">Keep last lines</option> <option value="+">Keep everything from this line on</option> </param> - <param name="num_lines" type="integer" size="5" value="10" + <param name="num_lines" type="integer" value="10" label="Number of lines" help="These will be kept (depending on 'operation'). (--lines)" /> </inputs> <outputs>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/egrep1.txt Fri Feb 26 12:22:54 2016 -0500 @@ -0,0 +1,1 @@ +actgagctacg agctacgatcg atcgactacga
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/egrep_results1.txt Fri Feb 26 12:22:54 2016 -0500 @@ -0,0 +1,1 @@ +actgagctacg agctacgatcg atcgactacga
--- a/tool_dependencies.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/tool_dependencies.xml Fri Feb 26 12:22:54 2016 -0500 @@ -1,25 +1,25 @@ <?xml version="1.0"?> <tool_dependency> <package name="gnu_coreutils" version="8.22"> - <repository changeset_revision="b638666e399d" name="package_gnu_coreutils_8_22" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="8b60cf3e0c07" name="package_gnu_coreutils_8_22" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="gnu_awk" version="4.1.0"> - <repository changeset_revision="440a5170003f" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="0f0bdef2f686" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="gnu_grep" version="2.14"> - <repository changeset_revision="30b2e70d982e" name="package_gnu_grep_2_14" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="3f6615b56ebb" name="package_gnu_grep_2_14" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="gnu_sed" version="4.2.2-sandbox"> <repository changeset_revision="ae00df77b031" name="package_gnu_sed_4_2_2_sandbox" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="perl" version="5.18.1"> - <repository changeset_revision="f6efd799dc70" name="package_perl_5_18" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="318ccddfe301" name="package_perl_5_18" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="text_processing_perl_packages" version="1.0"> <install version="1.0"> <actions> <action type="setup_perl_environment"> - <repository changeset_revision="f6efd799dc70" name="package_perl_5_18" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> + <repository changeset_revision="318ccddfe301" name="package_perl_5_18" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> <package name="perl" version="5.18.1" /> </repository> <!-- allow downloading and installing an Perl package from cpan.org-->
--- a/unfold_column.py Tue Jun 30 17:47:36 2015 -0400 +++ b/unfold_column.py Fri Feb 26 12:22:54 2016 -0500 @@ -4,11 +4,16 @@ out = open(sys.argv[4], 'w+') +sep = sys.argv[3] +# un-sanitize Galaxy inputs +if sep == 'X': + sep = ';' + with open(sys.argv[1]) as handle: for line in handle: cols = line.split('\t') unfolding_column = int(sys.argv[2]) - 1 column_content = cols[ unfolding_column ] - for elem in column_content.split( sys.argv[3] ): + for elem in column_content.split( sep ): out.write( '\t'.join( cols[:unfolding_column] + [elem] + cols[unfolding_column+1:]) ) out.close()
--- a/unfold_column.xml Tue Jun 30 17:47:36 2015 -0400 +++ b/unfold_column.xml Fri Feb 26 12:22:54 2016 -0500 @@ -9,7 +9,7 @@ unfold_column.py '${infile}' $column - "$delimiter" + "${delimiter}" '${outfile}' ]]> </command> @@ -23,6 +23,7 @@ <option value="-">Dash</option> <option value="_">Underscore</option> <option value="|">Pipe</option> + <option value=";">Semicolon</option> </param> </inputs> <outputs>