Mercurial > repos > bgruening > text_processing
changeset 4:56e80527c482 draft
Uploaded
line wrap: on
line diff
--- a/awk.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/awk.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,10 +1,23 @@ -<tool id="tp_awk_tool" name="Text reformatting" version="0.1.1"> +<tool id="tp_awk_tool" name="Text reformatting" version="@BASE_VERSION@.0"> <description>with awk</description> - <requirements> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> <requirement type="package" version="4.1.0">gnu_awk</requirement> - </requirements> + </expand> + <version_command>awk --version | head -n 1</version_command> <command> - awk --sandbox -v FS=\$'\t' -v OFS=\$'\t' --re-interval -f '$awk_script' '$input' > '$output' +<![CDATA[ + awk + --sandbox + -v FS=\$'\t' + -v OFS=\$'\t' + --re-interval + -f '$awk_script' + "$input" + > "$output" +]]> </command> <inputs> <param format="txt" name="input" type="data" label="File to process" /> @@ -15,26 +28,25 @@ </valid> </sanitizer> </param> - </inputs> - <tests> + </inputs> + <configfiles> + <configfile name="awk_script"> + $url_paste + </configfile> + </configfiles> + <outputs> + <data format="input" name="output" metadata_source="input"/> + </outputs> + <tests> <test> <param name="input" value="unix_awk_input1.txt" /> + <param name="awk_script" value="$2>0.5 { print $2*9, $1 }" /> <output name="output" file="unix_awk_output1.txt" /> - <param name="FS" value="tab" /> - <param name="OFS" value="tab" /> - <param name="file_data" value="$2>0.5 { print $2*9, $1 }" /> </test> - </tests> - <outputs> - <data format="input" name="output" metadata_source="input"/> - </outputs> - <configfiles> - <configfile name="awk_script"> - $url_paste - </configfile> - </configfiles> - <help> + </tests> + <help> +<![CDATA[ **What it does** This tool runs the unix **awk** command on the selected data file. @@ -111,6 +123,7 @@ - **$** matches the end of a line or string. - **\|** Separates alternate possibilities. - +@REFERENCES@ +]]> </help> </tool>
--- a/cut.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/cut.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,162 +1,179 @@ -<tool id="tp_cut_tool" name="Cut" version="0.1.1"> +<tool id="tp_cut_tool" name="Cut" version="@BASE_VERSION@.0"> <description>columns from a table</description> - <requirements> - <requirement type="package" version="8.21">gnu_coreutils</requirement> - </requirements> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>cut --version | head -n 1</version_command> <command> - cut - #if str($delimiter) != '': - -d"${delimiter}" - #end if - ${complement} ${cut_element} '${list}' '${input}' > '${output}' +<![CDATA[ + cut + #if str($delimiter) != '': + -d"${delimiter}" + #end if + ${complement} + ${cut_type_options.cut_element} + '${cut_type_options.list}' + '${input}' + > '${output}' +]]> </command> - <inputs> - <param format="txt" name="input" type="data" label="file to cut" /> + <param name="input" format="txt" type="data" label="File to cut" /> <param name="complement" type="select" label="Operation"> <option value="">Keep</option> <option value="--complement">Discard</option> </param> - <param name="delimiter" type="select" label="Delimited by"> - <option value="">Tab</option> - <option value=" ">Whitespace</option> - <option value=".">Dot</option> - <option value=",">Comma</option> - <option value="-">Dash</option> - <option value="_">Underscore</option> - <option value="|">Pipe</option> + <option value="">Tab</option> + <option value=" ">Whitespace</option> + <option value=".">Dot</option> + <option value=",">Comma</option> + <option value="-">Dash</option> + <option value="_">Underscore</option> + <option value="|">Pipe</option> </param> - - <param name="cut_element" type="select" label="Cut by"> - <option value="-f">fields</option> - <option value="-c">characters</option> - <option value="-b">bytes</option> - </param> - - <param name="list" type="text" size="20" value="" label="List of Fields/Characters/Bytes" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - </valid> - </sanitizer> - </param> + <conditional name="cut_type_options"> + <param name="cut_element" type="select" label="Cut by"> + <option value="-f">fields</option> + <option value="-c">characters</option> + <option value="-b">bytes</option> + </param> + <when value="-f"> + <param name="list" type="data_column" data_ref="input" multiple="true" label="List of Fields" help="(-f)" /> + </when> + <when value="-c"> + <param name="list" type="text" size="20" value="" label="List of Fields" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + <when value="-b"> + <param name="list" type="text" size="20" value="" label="List of Bytes" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> </inputs> <outputs> <!--<data format="tabular" name="output" />--> <!-- WIP, not sure that will work with the complement option --> <data format="tabular" name="output"> - <actions> - <conditional name="cut_element"> - <when value="-f"> - <conditional name="delimiter"> - <when value="T"> - <conditional name="input"> - <when datatype_isinstance="interval"> - <action type="format" default="tabular"> - <option type="from_param" name="list" column="0" offset="0"> <!-- chromCol is 1--> - - <filter type="insert_column" column="0" value="interval"/> - <filter type="insert_column" ref="list" /> <!-- startCol --> - <filter type="insert_column" ref="list" /> <!-- endCol --> - - <filter type="multiple_splitter" column="1" separator=","/> - <filter type="column_strip" column="1"/> <!-- get rid of all external whitespace --> - <filter type="string_function" column="1" name="lower" /> - <filter type="param_value" column="1" value="^c\d{1,}$" compare="re_search" keep="True"/> - <filter type="column_strip" column="1" strip="c"/> <!-- get rid of c's --> - <filter type="boolean" column="1" cast="int" /> + <actions> + <conditional name="cut_type_options.cut_element"> + <!-- fields --> + <when value="-f"> + <conditional name="delimiter"> + <when value="T"> + <conditional name="input"> + <when datatype_isinstance="interval"> + <action type="format" default="tabular"> + <option type="from_param" name="list" column="0" offset="0"> <!-- chromCol is 1--> + <filter type="insert_column" column="0" value="interval"/> + <filter type="insert_column" ref="list" /> <!-- startCol --> + <filter type="insert_column" ref="list" /> <!-- endCol --> - <filter type="multiple_splitter" column="2" separator=","/> - <filter type="column_strip" column="2"/> <!-- get rid of all external whitespace --> - <filter type="string_function" column="2" name="lower" /> - <filter type="param_value" column="2" value="^c\d{1,}$" compare="re_search" keep="True"/> - <filter type="column_strip" column="2" strip="c"/> <!-- get rid of c's --> - <filter type="boolean" column="2" cast="int" /> + <filter type="multiple_splitter" column="1" separator=","/> + <filter type="column_strip" column="1"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="1" name="lower" /> + <filter type="param_value" column="1" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="1" strip="c"/> <!-- get rid of c's --> + <filter type="boolean" column="1" cast="int" /> - <filter type="multiple_splitter" column="3" separator=","/> - <filter type="column_strip" column="3"/> <!-- get rid of all external whitespace --> - <filter type="string_function" column="3" name="lower" /> - <filter type="param_value" column="3" value="^c\d{1,}$" compare="re_search" keep="True"/> - <filter type="column_strip" column="3" strip="c"/> <!-- get rid of c's --> - <filter type="boolean" column="3" cast="int" /> + <filter type="multiple_splitter" column="2" separator=","/> + <filter type="column_strip" column="2"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="2" name="lower" /> + <filter type="param_value" column="2" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="2" strip="c"/> <!-- get rid of c's --> + <filter type="boolean" column="2" cast="int" /> - <filter type="metadata_value" ref="input" name="chromCol" column="1" /> - <filter type="metadata_value" ref="input" name="startCol" column="2" /> - <filter type="metadata_value" ref="input" name="endCol" column="3" /> + <filter type="multiple_splitter" column="3" separator=","/> + <filter type="column_strip" column="3"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="3" name="lower" /> + <filter type="param_value" column="3" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="3" strip="c"/> <!-- get rid of c's --> + <filter type="boolean" column="3" cast="int" /> - </option> - </action> - + <filter type="metadata_value" ref="input" name="chromCol" column="1" /> + <filter type="metadata_value" ref="input" name="startCol" column="2" /> + <filter type="metadata_value" ref="input" name="endCol" column="3" /> + </option> + </action> <conditional name="output"> - <when datatype_isinstance="interval"> - <action type="metadata" name="chromCol"> - <option type="from_param" name="list" column="0" offset="0"> <!-- chromCol is 0--> - <filter type="multiple_splitter" column="0" separator=","/> - <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> - <filter type="string_function" column="0" name="lower" /> - <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> - <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> - <filter type="insert_column" value="1" iterate="True" column="0"/> - <filter type="boolean" column="1" cast="int" /> - <filter type="metadata_value" ref="input" name="chromCol" column="1" /> - </option> - </action> - - <action type="metadata" name="startCol"> - <option type="from_param" name="list" column="0" offset="0"> <!-- startCol is 0--> - <filter type="multiple_splitter" column="0" separator=","/> - <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> - <filter type="string_function" column="0" name="lower" /> - <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> - <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> - <filter type="insert_column" value="1" iterate="True" column="0"/> - <filter type="boolean" column="1" cast="int" /> - <filter type="metadata_value" ref="input" name="startCol" column="1" /> - </option> - </action> - - <action type="metadata" name="endCol"> - <option type="from_param" name="list" column="0" offset="0"> <!-- endCol is 0--> - <filter type="multiple_splitter" column="0" separator=","/> - <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> - <filter type="string_function" column="0" name="lower" /> - <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> - <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> - <filter type="insert_column" value="1" iterate="True" column="0"/> - <filter type="boolean" column="1" cast="int" /> - <filter type="metadata_value" ref="input" name="endCol" column="1" /> - </option> - </action> - - <action type="metadata" name="nameCol" default="0"> - <option type="from_param" name="list" column="0" offset="0"> <!-- nameCol is 0--> - <filter type="multiple_splitter" column="0" separator=","/> - <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> - <filter type="string_function" column="0" name="lower" /> - <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> - <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> - <filter type="insert_column" value="1" iterate="True" column="0"/> - <filter type="boolean" column="1" cast="int" /> - <filter type="metadata_value" ref="input" name="nameCol" column="1" /> - </option> - </action> - - <action type="metadata" name="strandCol" default="0"> - <option type="from_param" name="list" column="0" offset="0"> <!-- strandCol is 0--> - <filter type="multiple_splitter" column="0" separator=","/> - <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> - <filter type="string_function" column="0" name="lower" /> - <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> - <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> - <filter type="insert_column" value="1" iterate="True" column="0"/> - <filter type="boolean" column="1" cast="int" /> - <filter type="metadata_value" ref="input" name="strandCol" column="1" /> - </option> - </action> + <when datatype_isinstance="interval"> + <action type="metadata" name="chromCol"> + <option type="from_param" name="list" column="0" offset="0"> <!-- chromCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="chromCol" column="1" /> + </option> + </action> + + <action type="metadata" name="startCol"> + <option type="from_param" name="list" column="0" offset="0"> <!-- startCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="startCol" column="1" /> + </option> + </action> + + <action type="metadata" name="endCol"> + <option type="from_param" name="list" column="0" offset="0"> <!-- endCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="endCol" column="1" /> + </option> + </action> + + <action type="metadata" name="nameCol" default="0"> + <option type="from_param" name="list" column="0" offset="0"> <!-- nameCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="nameCol" column="1" /> + </option> + </action> + + <action type="metadata" name="strandCol" default="0"> + <option type="from_param" name="list" column="0" offset="0"> <!-- strandCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="strandCol" column="1" /> + </option> + </action> </when> </conditional> - </when> </conditional> </when> @@ -180,9 +197,8 @@ <output name="output" file="eq-cut.dat" /> </test> </tests> - <help> - +<![CDATA[ **What it does** This tool runs the **cut** unix command, which extract or delete columns from a file. @@ -229,5 +245,7 @@ ora ban +@REFERENCES@ +]]> </help> </tool>
--- a/easyjoin Sun Oct 06 08:22:36 2013 -0400 +++ b/easyjoin Wed Jan 07 11:10:52 2015 -0500 @@ -224,7 +224,7 @@ my ($input_filename, $output_filename, $key_column) = @_; my @SORT_COMMAND; - push @SORT_COMMAND, $HEADER ? "sort-header" : "sort" ; + push @SORT_COMMAND, $HEADER ? "./sort-header" : "sort" ; push @SORT_COMMAND, "-f" if $IGNORE_CASE; push @SORT_COMMAND, "-k${key_column},${key_column}" ; push @SORT_COMMAND, "--buffer-size", $SORT_BUFFER_SIZE if $SORT_BUFFER_SIZE;
--- a/easyjoin.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/easyjoin.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,41 +1,51 @@ -<tool id="tp_easyjoin_tool" name="Join" version="0.1.1"> - <requirements> - <requirement type="package" version="8.21">gnu_coreutils</requirement> - </requirements> +<tool id="tp_easyjoin_tool" name="Join" version="@BASE_VERSION@.0"> <description>two files</description> - <command interpreter="perl">easyjoin $jointype - -t ' ' - $header - -e '$empty_string_filler' - -o auto - $ignore_case - -1 '$column1' - -2 '$column2' - "$input1" "$input2" - > '$output' + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="set_environment">TP_SCRIPT_PATH</requirement> + </expand> + <version_command>join --version | head -n 1</version_command> + <command> +<![CDATA[ + cp \$TP_SCRIPT_PATH/sort-header ./ && + chmod +x sort-header && + perl \$TP_SCRIPT_PATH/easyjoin + $jointype + -t ' ' + $header + -e '$empty_string_filler' + -o auto + $ignore_case + -1 '$column1' + -2 '$column2' + "$infile1" + "$infile2" + > '$output' +]]> </command> + <inputs> + <param name="infile1" format="tabular" type="data" label="1st file" /> + <param name="column1" label="Column to use from 1st file" type="data_column" data_ref="infile1" accept_default="true" /> - <inputs> - <param format="txt" name="input1" type="data" label="1st file" /> - <param name="column1" label="Column to use from 1st file" type="data_column" data_ref="input1" accept_default="true" /> - - <param format="txt" name="input2" type="data" label="2nd File" /> - <param name="column2" label="Column to use from 2nd file" type="data_column" data_ref="input2" accept_default="true" /> + <param name="infile2" format="txt" type="data" label="2nd File" /> + <param name="column2" label="Column to use from 2nd file" type="data_column" data_ref="infile2" accept_default="true" /> <param name="jointype" type="select" label="Output lines appearing in"> - <option value=" ">BOTH 1st & 2nd file.</option> - <option value="-v 1">1st but not in 2nd file. [-v 1]</option> - <option value="-v 2">2nd but not in 1st file. [-v 2]</option> - <option value="-a 1">both 1st & 2nd file, plus unpairable lines from 1st file. [-a 1]</option> - <option value="-a 2">both 1st & 2nd file, plus unpairable lines from 2st file. [-a 2]</option> - <option value="-a 1 -a 2">All Lines [-a 1 -a 2]</option> + <option value=" " selected="True">Both 1st & 2nd file.</option> + <option value="-v 1">1st but not in 2nd file. (-v 1)</option> + <option value="-v 2">2nd but not in 1st file. (-v 2)</option> + <option value="-a 1">Both 1st & 2nd file, plus unpairable lines from 1st file. (-a 1)</option> + <option value="-a 2">Both 1st & 2nd file, plus unpairable lines from 2st file. (-a 2)</option> + <option value="-a 1 -a 2">All lines [-a 1 -a 2]</option> <option value="-v 1 -v 2">All unpairable lines [-v 1 -v 2]</option> </param> - <param name="header" type="boolean" checked="false" truevalue="--header" falsevalue="" label="First line is a header line" help="Use if first line contains column headers. It will not be sorted." /> - - <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." /> - + <param name="header" type="boolean" checked="false" truevalue="--header" falsevalue="" + label="First line is a header line" help="Use if first line contains column headers. It will not be sorted." /> + <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" + label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." /> <param name="empty_string_filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields"> <sanitizer> <valid initial="string.printable"> @@ -43,13 +53,23 @@ </valid> </sanitizer> </param> - - </inputs> - <outputs> - <data name="output" format="input" metadata_source="input1"/> - </outputs> - -<help> + </inputs> + <outputs> + <data name="output" format="input" metadata_source="infile1"/> + </outputs> + <tests> + <test> + <param name="infile1" value="easyjoin1.tabular" /> + <param name="column1" value="1" /> + <param name="infile2" value="easyjoin2.tabular" /> + <param name="column2" value="1" /> + <param name="header" value="True" /> + <param name="jointype" value="-a 1 -a 2" /> + <output name="output" file="easyjoin_result1.tabular" /> + </test> + </tests> + <help> +<![CDATA[ **What it does** This tool joins two tabular files based on a common key column. @@ -83,13 +103,13 @@ Melon green . Orange orange 7 -# Input files need not be sorted. -# The header line (**Fruit Color Price**) was joined and kept as first line. -# Missing values ( Avocado's color, missing from the first file ) are replaced with a period character. +.. class:: infomark ------ + * Input files need not be sorted. + * The header line (**Fruit Color Price**) was joined and kept as first line. + * Missing values ( Avocado's color, missing from the first file ) are replaced with a period character. -*easyjoin* was written by A. Gordon - -</help> +@REFERENCES@ +]]> + </help> </tool>
--- a/find_and_replace.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/find_and_replace.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,30 +1,34 @@ -<tool id="tp_find_and_replace" name="Replace" version="0.1.1"> +<tool id="tp_find_and_replace" name="Replace" version="@BASE_VERSION@.0"> <description>parts of text</description> + <macros> + <import>macros.xml</import> + </macros> <command interpreter="perl"> +<![CDATA[ find_and_replace - #if $searchwhere.choice == "column": + #if $searchwhere.searchwhere_select == "column": -c $searchwhere.column #end if - -o $output + -o $outfile $caseinsensitive $wholewords $skip_first_line $is_regex - '$url_paste' - '$file_data' - '$input' + '$find_pattern' + '$replace_pattern' + '$infile' +]]> </command> <inputs> - <param format="txt" name="input" type="data" label="File to process" /> - <param name="url_paste" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > + <param name="infile" format="txt" type="data" label="File to process" /> + <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > <sanitizer> <valid initial="string.printable"> <remove value="'"/> </valid> </sanitizer> </param> - - <param name="file_data" type="text" size="20" label="Replace with" help="Use simple text, or $& (dollar-ampersand) and $1 $2 $3 to refer to matched text. See examples below." > + <param name="replace_pattern" type="text" size="20" label="Replace with" help="Use simple text, or $& (dollar-ampersand) and $1 $2 $3 to refer to matched text. See examples below." > <sanitizer> <valid initial="string.printable"> <remove value="'"/> @@ -32,40 +36,59 @@ </sanitizer> </param> - <param name="is_regex" type="boolean" checked="false" truevalue="-r" falsevalue="" label="Find-Pattern is a regular expression" - help="see help section for details." /> + <param name="is_regex" type="boolean" checked="false" truevalue="-r" falsevalue="" + label="Find-Pattern is a regular expression" help="see help section for details." /> - <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Case-Insensitive search" - help="" /> + <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue="" + label="Case-Insensitive search" help="" /> - <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue="" label="find whole-words" - help="ignore partial matches (e.g. 'apple' will not match 'snapple') " /> + <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue="" + label="Find whole-words" help="ignore partial matches (e.g. 'apple' will not match 'snapple')" /> - <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Ignore first line" - help="Select this option if the first line contains column headers. Text in the line will not be replaced. " /> + <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue="" + label="Ignore first line" help="Select this option if the first line contains column headers. Text in the line will not be replaced. " /> <conditional name="searchwhere"> - <param name="choice" type="select" label="Find and Replace text in"> + <param name="searchwhere_select" type="select" label="Find and Replace text in"> <option value="line" selected="true">entire line</option> <option value="column">specific column</option> </param> <when value="line" /> - <when value="column"> - <param name="column" label="in column" type="data_column" data_ref="input" accept_default="true" /> + <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" /> </when> </conditional> </inputs> - <outputs> - <data format="input" name="output" metadata_source="input" /> + <data format="input" name="outfile" metadata_source="infile" /> </outputs> - -<help> - + <tests> + <test> + <param name="infile" value="find_and_replace1.txt" /> + <param name="find_pattern" value="day" /> + <param name="replace_pattern" value="great day" /> + <param name="is_regex" value="False" /> + <param name="caseinsensitive" value="False" /> + <param name="wholewords" value="True" /> + <output name="outfile" file="find_and_replace_results1.txt" /> + </test> + <test> + <param name="infile" value="find_and_replace2.txt" /> + <param name="find_pattern" value="^chr" /> + <param name="replace_pattern" value="" /> + <param name="is_regex" value="True" /> + <param name="caseinsensitive" value="False" /> + <param name="wholewords" value="False" /> + <param name="searchwhere_select" value="column" /> + <param name="column" value="3" /> + <output name="outfile" file="find_and_replace_results2.txt" /> + </test> + </tests> + <help> +<![CDATA[ **What it does** -This tool finds & replaces text in an input dataset. +This tool finds $ replaces text in an input dataset. .. class:: infomark @@ -93,7 +116,7 @@ **Examples of Replace Patterns** - **WORLD** The word 'WORLD' will be placed whereever the find pattern was found. -- **FOO-&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **$&** (dollar-ampersand) represents the matched find pattern. +- **FOO-$&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **$&** (dollar-ampersand) represents the matched find pattern. - **$1** The text which matched the first parenthesis in the Find Pattern. @@ -106,7 +129,7 @@ **Regular Expression:** no **Replace what:** entire line -Every time the word HELLO is found, it will be replaced with the word WORLD. +Every time the word HELLO is found, it will be replaced with the word WORLD. ----- @@ -124,7 +147,7 @@ **Perl's Regular Expression Syntax** -The Find & Replace tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. +The Find & Replace tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. - **( ) { } [ ] . * ? + \\ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for. - **^** matches the beginning of a string(but not an internal line). @@ -149,7 +172,7 @@ - **\\w** matches a single letter or digit or an underscore. - **\\s** matches a single white-space (space or tabs). - -</help> - +@REFERENCES@ +]]> + </help> </tool>
--- a/grep.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/grep.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,21 +1,42 @@ -<tool id="tp_grep_tool" name="Search in textfiles" version="0.1.1"> +<tool id="tp_grep_tool" name="Search in textfiles" version="@BASE_VERSION@.0"> <description>(grep)</description> - <requirements> - <requirement type="package" version="8.21">gnu_coreutils</requirement> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> <requirement type="package" version="2.14">gnu_grep</requirement> <requirement type="set_environment">TP_SCRIPT_PATH</requirement> - </requirements> + </expand> + <version_command>grep --version | head -n 1</version_command> <command> +<![CDATA[ #if str($color) == "COLOR": - GREP_COLOR='1;34' grep --color=always -P "$@" -- "${url_paste}" '${input}' | \$TP_SCRIPT_PATH/ansi2html.sh > "${output}" + GREP_COLOR='1;34' + grep + --color=always + -P + -A $lines_after + -B $lines_before + $invert + $case_sensitive + -- "${url_paste}" + '${infile}' | \$TP_SCRIPT_PATH/ansi2html.sh > "${output}" #else: - grep -P "$@" -- "${url_paste}" '${input}' | grep -v "^--$" > "${output}" + grep + -P + -A $lines_after + -B $lines_before + $invert + $case_sensitive + -- "${url_paste}" + '${infile}' | grep -v "^--$" > "${output}" #end if - ##grep_wrapper.sh '$input' '$output' '$url_paste' $color -A $lines_after -B $lines_before $invert $case_sensitive + ##grep_wrapper.sh '$infile' '$output' '$url_paste' $color -A $lines_after -B $lines_before $invert $case_sensitive +]]> </command> <inputs> - <param format="txt" name="input" type="data" label="Select lines from" /> + <param name="infile" format="txt" type="data" label="Select lines from" /> <param name="invert" type="select" label="that"> <option value="">Match</option> @@ -30,55 +51,54 @@ </sanitizer> </param> - <param name="case_sensitive" type="select" label="Match type"> + <param name="case_sensitive" type="select" label="Match type" help="(-i)"> <option value="-i">case insensitive</option> <option value="">case sensitive</option> </param> - - <param name="lines_before" type="integer" label="Show lines preceding the matched line (-B)" help="leave it at zero unless you know what you're doing" value="0" /> - <param name="lines_after" type="integer" label="Show lines trailing the matched line (-A)" help="leave it at zero unless you know what you're doing" value="0" /> - + <param name="lines_before" type="integer" value="0" + label="Show lines preceding the matched line" help="leave it at zero unless you know what you're doing. (-B)" /> + <param name="lines_after" type="integer" value="0" + label="Show lines trailing the matched line" help="leave it at zero unless you know what you're doing. (-A)" /> <param name="color" type="select" label="Output"> <option value="NOCOLOR">text file (for further processing)</option> <option value="COLOR">Highlighted HTML (for easier viewing)</option> </param> - </inputs> - <outputs> - <data format="input" name="output" metadata_source="input"> - <change_format> - <when input="color" value="COLOR" format="html" - /> - </change_format> - </data> - </outputs> - <tests> - <test> - <!-- grep a FASTA file for sequences with specific motif --> - <param name="input" value="unix_grep_input1.txt" /> - <output name="output" file="unix_grep_output1.txt" /> - <param name="case_sensitive" value="case sensitive" /> - <param name="invert" value="" /> - <param name="url_paste" value="AA.{2}GT" /> - <param name="lines_before" value="1" /> - <param name="lines_after" value="0" /> - <param name="color" value="NOCOLOR" /> - </test> - <test> - <!-- grep a FASTA file for sequences with specific motif - + </inputs> + <outputs> + <data format="input" name="output" metadata_source="infile"> + <change_format> + <when input="color" value="COLOR" format="html"/> + </change_format> + </data> + </outputs> + <tests> + <test> + <!-- grep a FASTA file for sequences with specific motif --> + <param name="infile" value="unix_grep_input1.txt" /> + <param name="case_sensitive" value="case sensitive" /> + <param name="invert" value="" /> + <param name="url_paste" value="AA.{2}GT" /> + <param name="lines_before" value="1" /> + <param name="lines_after" value="0" /> + <param name="color" value="NOCOLOR" /> + <output name="output" file="unix_grep_output1.txt" /> + </test> + <test> + <!-- grep a FASTA file for sequences with specific motif - show highlighed output --> - <param name="input" value="unix_grep_input1.txt" /> - <output name="output" file="unix_grep_output2.html" /> - <param name="case_sensitive" value="case sensitive" /> - <param name="invert" value="" /> - <param name="url_paste" value="AA.{2}GT" /> - <param name="lines_before" value="0" /> - <param name="lines_after" value="0" /> - <param name="color" value="COLOR" /> - </test> - </tests> -<help> - + <param name="infile" value="unix_grep_input1.txt" /> + <param name="case_sensitive" value="case sensitive" /> + <param name="invert" value="" /> + <param name="url_paste" value="AA.{2}GT" /> + <param name="lines_before" value="0" /> + <param name="lines_after" value="0" /> + <param name="color" value="COLOR" /> + <output name="output" file="unix_grep_output2.html" /> + </test> + </tests> + <help> +<![CDATA[ **What it does** This tool runs the unix **grep** command on the selected data file. @@ -139,6 +159,7 @@ - **$** matches the end of a line or string. - **\|** Separates alternate possibilities. - -</help> +@REFERENCES@ +]]> + </help> </tool>
--- a/head.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/head.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,29 +1,28 @@ -<tool id="tp_head_tool" name="Select first" version="0.1.1"> +<tool id="tp_head_tool" name="Select first" version="@BASE_VERSION@.0"> <description>lines from a dataset (head)</description> - <requirements> - <requirement type="package" version="8.21">gnu_coreutils</requirement> - </requirements> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>head --version | head -n 1</version_command> <command> - head --lines $complement$count '${infile}' > '${outfile}' +<![CDATA[ + head + --lines + $complement$count + '${infile}' + > '${outfile}' +]]> </command> - <inputs> - <param format="txt" name="infile" type="data" label="File to select" /> - + <param name="infile" type="data" format="txt" label="File to select" /> <param name="complement" type="select" label="Operation"> <option value="">Keep first lines</option> <option value="-">Remove last lines</option> </param> - - <param name="count" type="integer" size="5" value="10" label="Number of lines" help="These will be kept/discarded (depending on 'operation')."> - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - </valid> - </sanitizer> - </param> + <param name="count" type="integer" size="5" value="10" + label="Number of lines" help="These will be kept/discarded (depending on 'operation'). (--lines)" /> </inputs> - <outputs> <data format="input" name="outfile" metadata_source="infile"/> </outputs> @@ -31,11 +30,11 @@ <test> <param name="count" value="10"/> <param name="infile" value="1.bed"/> - <output name="out_file1" file="eq-showbeginning.dat"/> + <output name="outfile" file="head_results1.bed"/> </test> </tests> <help> - +<![CDATA[ **What it does** This tool outputs specified number of lines from the **beginning** of a dataset @@ -57,6 +56,7 @@ chr7 56632 56652 D17003_CTCF_R6 310 + chr7 56736 56756 D17003_CTCF_R7 354 + - +@REFERENCES@ +]]> </help> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,37 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="8.22">gnu_coreutils</requirement> + <yield/> + </requirements> + </xml> + <token name="@BASE_VERSION@">1.0</token> + <xml name="stdio"> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + </xml> + <token name="@REFERENCES@"> +<![CDATA[ +------ + +**Citation** + +If you use this tool in Galaxy, please cite: + +Bjoern A. Gruening (2014), `Galaxy wrapper <https://github.com/bgruening/galaxytools>`_ + +Assaf Gordon (gordon <at> cshl dot edu) +]]> + </token> + <xml name="citations"> + <citations> + <yield /> + </citations> + </xml> +</macros>
--- a/multijoin.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/multijoin.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,49 +1,64 @@ -<tool id="tp_multijoin'_tool" name="Multi-Join" version="0.1.1"> - <description>(combine multiple files)</description> - <command interpreter="perl">multijoin +<tool id="tp_multijoin'_tool" name="Multi-Join" version="@BASE_VERSION@.0"> + <description>(combine multiple files)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="package" version="5.18">perl</requirement> + <requirement type="package" version="1.0">text_processing_perl_packages</requirement> + </expand> + <command interpreter="perl"> +<![CDATA[ + multijoin --key '$key_column' --values '$value_columns' --filler '$filler' $ignore_dups $output_header $input_header - $first_filename - #for $file in $files - '$file.filename' + $first_file + #for $file in $files: + '$file' #end for - > '$output' - </command> + > '$outfile' +]]> + </command> + <inputs> + <param name="first_file" type="data" format="txt" label="File to join"/> + <param name="files" multiple="True" type="data" format="txt" label="add additional file" /> - <inputs> + <param name="key_column" label="Common key column" type="integer" + value="1" help="Usually gene-ID or other common value" /> - <param name="first_filename" label="File to join" type="data" format="txt" /> - <repeat name="files" title="with" min="2"> - <param name="filename" label="add additional file" type="data" format="txt" /> - </repeat> - - <param name="key_column" label="Common key column" type="integer" - value="1" help="Usually gene-ID or other common value" /> + <param name="value_columns" label="Column with values to preserve" + type="data_column" data_ref="first_file" accept_default="true" multiple="True" display="checkboxes"/> - <param name="value_columns" label="Column with values to preserve" - type="data_column" data_ref="first_filename" accept_default="true" multiple="True" display="checkboxes"/> - - <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" /> - <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" /> - <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." /> - <param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields"> - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - </valid> - </sanitizer> - </param> - - </inputs> - <outputs> - <data name="output" format="input" metadata_source="input" /> - </outputs> - -<help> + <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" /> + <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" /> + <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." /> + <param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </inputs> + <outputs> + <data name="outfile" format="first_file" metadata_source="first_file" /> + </outputs> + <tests> + <test> + <param name="first_file" value="multijoin1.txt" /> + <param name="files" value="multijoin2.txt,multijoin3.txt" /> + <param name="key_column" value="4" /> + <param name="value_columns" value="c7,c8,c9" /> + <param name="output_header" value="True" /> + <output name="outfile" file="multijoin_result1.txt" /> + </test> + </tests> + <help> +<![CDATA[ **What it does** This tool joins multiple tabular files based on a common key column. @@ -108,12 +123,11 @@ FBtr0300796 0 0 0 56 1296 14475 0 0 0 ... +.. class:: infomark -# Input files need not be sorted. - ------ +Input files need not be sorted. -*multijoin* was written by A. Gordon (gordon at cshl dot edu) - -</help> +@REFERENCES@ +]]> + </help> </tool>
--- a/readme.rst Sun Oct 06 08:22:36 2013 -0400 +++ b/readme.rst Wed Jan 07 11:10:52 2015 -0500 @@ -2,7 +2,9 @@ ===================================================== The initial work was done by Assaf Gordon and Greg Hannon's lab ( http://hannonlab.cshl.edu ) -in Cold Spring Harbor Laboratory ( http://www.cshl.edu ). +in Cold Spring Harbor Laboratory ( http://www.cshl.edu ). In late 2013 maintainence and +further development was taken over by Bjoern Gruening. Feel free to contribute any general purpose +text manipulation tool to this repository. Tools: @@ -33,7 +35,7 @@ Requirements ------------ -1. Coreutils vesion 8.19 or later. +1. Coreutils vesion 8.22 or later. 2. AWK version 4.0.1 or later. 3. SED version 4.2 *with* a special patch 4. Grep with PCRE support @@ -82,9 +84,7 @@ TODO ---- -- unit-tests -- uniqu will get a new --group funciton with the 8.22 release, its currently commended out -- also shuf will get a major improved performance with large files http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=commit;h=20d7bce0f7e57d9a98f0ee811e31c757e9fedfff +- add shuf we can remove the random feature from sort and use shuf instead - move some advanced settings under a conditional, for example the cut tools offers to cut bytes - cut wrapper has some output conditional magic for interval files, that needs to be checked @@ -97,7 +97,7 @@ ------- * Copyright (c) 2009-2013 A. Gordon (gordon <at> cshl dot edu) -* Copyright (c) 2013 B. Gruening (bjoern dot gruening <at> gmail dot com) +* Copyright (c) 2013-2015 B. Gruening (bjoern dot gruening <at> gmail dot com) Permission is hereby granted, free of charge, to any person obtaining
--- a/remove_ending.xml Sun Oct 06 08:22:36 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ -<tool id="tp_remove_ending" name="Remove ending" version="0.1"> - <description>of a file</description> - <requirements> - <requirement type="package" version="8.21">gnu_coreutils</requirement> - </requirements> - <command interpreter="sh">tail -n -$num_lines $infile $outfile</command> - <inputs> - <param name="num_lines" size="5" type="integer" value="1" label="Remove last n lines" help=""/> - <param format="txt" name="input" type="data" label="from"/> - </inputs> - <tests> - <test> - <param name="infile" value="remove_ending_input1.txt" /> - <output name="out_file1" file="remove_ending_output1.txt" /> - <param name="num_lines" value="2" /> - </test> - </tests> - <outputs> - <data format="input" name="outfile" metadata_source="input"/> - </outputs> - <help> - -**What it does** - -This tool removes specified number of lines from the ending of a dataset - ------ - -**Example** - -Input File:: - - chr7 56632 56652 D17003_CTCF_R6 310 + - chr7 56736 56756 D17003_CTCF_R7 354 + - chr7 56761 56781 D17003_CTCF_R4 220 + - chr7 56772 56792 D17003_CTCF_R7 372 + - chr7 56775 56795 D17003_CTCF_R4 207 + - -After removing the last 2 lines the dataset will look like this:: - - chr7 56632 56652 D17003_CTCF_R6 310 + - chr7 56736 56756 D17003_CTCF_R7 354 + - chr7 56761 56781 D17003_CTCF_R4 220 + - - </help> -</tool>
--- a/replace_text_in_column.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/replace_text_in_column.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,16 +1,27 @@ -<tool id="tp_replace_in_column" name="Replace Text" version="0.1"> +<tool id="tp_replace_in_column" name="Replace Text" version="@BASE_VERSION@.0"> <description>in a specific column</description> - <requirements> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> <requirement type="package" version="4.1.0">gnu_awk</requirement> - </requirements> + </expand> + <version_command>awk --version | head -n 1</version_command> <command interpreter="sh"> - #adapt to awk's quirks - to pass an acutal backslash - two backslashes are required (just like in a C string) +<![CDATA[ + ##adapt to awk's quirks - to pass an acutal backslash - two backslashes are required (just like in a C string) REPLACE_PATTERN=\${$replace_pattern//\\/\\\\}; - awk -v OFS="\t" --re-interval --sandbox "{ \$$column = gensub( /$find_pattern/, \"$replace_pattern\", \"g\", \$$column ) ; print \$0 ; }" "$input" > "$output" + awk + -v OFS="\t" + --re-interval + --sandbox "{ \$$column = gensub( /$find_pattern/, \"$replace_pattern\", \"g\", \$$column ) ; print \$0 ; }" + "$infile" + > "$output" +]]> </command> <inputs> - <param format="tabular" name="input" type="data" label="File to process" /> - <param name="column" label="in column" type="data_column" data_ref="input" accept_default="true" /> + <param format="tabular" name="infile" type="data" label="File to process" /> + <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" /> <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > <sanitizer> @@ -19,7 +30,6 @@ </valid> </sanitizer> </param> - <param name="replace_pattern" type="text" size="20" label="Replace with" help="Use simple text, or & (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." > <sanitizer> <valid initial="string.printable"> @@ -27,22 +37,21 @@ </valid> </sanitizer> </param> - </inputs> + <outputs> + <data format="input" name="output" metadata_source="infile" /> + </outputs> <tests> <test> - <param name="input" value="replace_text_in_column_in1.txt" ftype="tabular" /> + <param name="infile" value="replace_text_in_column_in1.txt" ftype="tabular" /> + <param name="column" value="4" /> + <param name="find_pattern" value=".+_(R.)" /> + <param name="replace_pattern" value="\1" /> <output name="output" file="replace_text_in_column_output1.txt" /> - <param name="column" value="4" /> - <param name="url_paste" value=".+_(R.)" /> - <param name="file_data" value="\1" /> </test> </tests> - <outputs> - <data format="input" name="output" metadata_source="input" /> - </outputs> <help> - +<![CDATA[ **What it does** This tool performs find & replace operation on a specified column in a given file. @@ -74,8 +83,6 @@ - **\\1** The text which matched the first parenthesis in the Find Pattern. - - ----- **Example 1** @@ -124,5 +131,7 @@ **Note**: AWK uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported. +@REFERENCES@ +]]> </help> </tool>
--- a/replace_text_in_line.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/replace_text_in_line.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,16 +1,24 @@ -<tool id="tp_replace_in_line" name="Replace Text" version="0.1"> +<tool id="tp_replace_in_line" name="Replace Text" version="@BASE_VERSION@.0"> <description>in entire line</description> - <requirements> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> - </requirements> - + </expand> + <version_command>sed --version | head -n 1</version_command> <command interpreter="sh"> - sed -r --sandbox "s/$find_pattern/$replace_pattern/g" "$input" > "$output" +<![CDATA[ + sed + -r + --sandbox + "s/$find_pattern/$replace_pattern/g" + "$input" + > "$output" +]]> </command> - <inputs> <param format="txt" name="input" type="data" label="File to process" /> - <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > <sanitizer> <valid initial="string.printable"> @@ -18,7 +26,6 @@ </valid> </sanitizer> </param> - <param name="replace_pattern" type="text" size="20" label="Replace with:" help="Use simple text, or & (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." > <sanitizer> <valid initial="string.printable"> @@ -26,21 +33,20 @@ </valid> </sanitizer> </param> - </inputs> - <tests> - <test> - <param name="input" value="replace_text_in_line_in1.txt" ftype="tabular" /> - <output name="output" file="replace_text_in_line_output1.txt" /> - <param name="url_paste" value="CTC." /> - <param name="file_data" value="FOOBAR" /> - </test> - </tests> <outputs> <data format="input" name="output" metadata_source="input"/> </outputs> + <tests> + <test> + <param name="input" value="replace_text_in_line_in1.txt" /> + <param name="find_pattern" value="CTC." /> + <param name="replace_pattern" value="FOOBAR" /> + <output name="output" file="replace_text_in_line_output1.txt" /> + </test> + </tests> <help> - +<![CDATA[ **What it does** This tool performs find & replace operation on a specified file. @@ -124,5 +130,7 @@ **Note**: SED uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported. +@REFERENCES@ +]]> </help> </tool>
--- a/sed.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/sed.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,19 +1,27 @@ -<tool id="tp_sed_tool" name="Text transformation" version="0.1.1"> - <description>with sed</description> - <requirements> +<tool id="tp_sed_tool" name="Text transformation" version="@BASE_VERSION@.0"> + <description>with sed</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> - </requirements> + </expand> + <version_command>sed --version | head -n 1</version_command> <command> - sed --sandbox -r - - #if $adv_opts.adv_opts_selector == 'advanced': - $adv_opts.silent - #end if - -f '$sed_script' '$input' > '$output' +<![CDATA[ + sed + --sandbox + -r + #if $adv_opts.adv_opts_selector == 'advanced': + $adv_opts.silent + #end if + -f '$sed_script' + '$infile' + > '$output' +]]> </command> <inputs> - <param format="txt" name="input" type="data" label="File to process" /> - + <param format="txt" name="infile" type="data" label="File to process" /> <param name="url_paste" type="text" area="true" size="5x35" label="SED Program" help=""> <sanitizer> <valid initial="string.printable"> @@ -21,7 +29,6 @@ </valid> </sanitizer> </param> - <conditional name="adv_opts"> <param name="adv_opts_selector" type="select" label="Advanced Options"> <option value="basic" selected="True">Hide Advanced Options</option> @@ -35,32 +42,31 @@ </param> </when> </conditional> - - </inputs> + </inputs> <configfiles> - <configfile name="sed_script"> + <configfile name="sed_script"> $url_paste - </configfile> - </configfiles> - <tests> - <test> - <param name="input" value="unix_sed_input1.txt" /> - <output name="output" file="unix_sed_output1.txt" /> - <param name="url_paste" value="1d ; s/foo/bar/" /> - <param name="silent" value="" /> - </test> - <test> - <param name="input" value="unix_sed_input1.txt" /> - <output name="output" file="unix_sed_output2.txt" /> - <param name="url_paste" value="/foo/ { s/foo/baz/g ; p }" /> - <param name="silent" value="silent" /> - </test> - </tests> - <outputs> - <data format="input" name="output" metadata_source="input" /> - </outputs> -<help> - + </configfile> + </configfiles> + <outputs> + <data format="input" name="output" metadata_source="infile" /> + </outputs> + <tests> + <test> + <param name="infile" value="unix_sed_input1.txt" /> + <param name="url_paste" value="1d ; s/foo/bar/" /> + <param name="silent" value="" /> + <output name="output" file="unix_sed_output1.txt" /> + </test> + <test> + <param name="infile" value="unix_sed_input1.txt" /> + <param name="url_paste" value="/foo/ { s/foo/baz/g ; p }" /> + <param name="silent" value="silent" /> + <output name="output" file="unix_sed_output2.txt" /> + </test> + </tests> + <help> +<![CDATA[ **What it does** This tool runs the unix **sed** command on the selected data file. @@ -120,5 +126,7 @@ **Note**: SED uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported. +@REFERENCES@ +]]> </help> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sort-header Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,281 @@ +#!/usr/bin/env perl +## +## Sort-header - wrapper for GNU sort with header-line support +## +## Copyright(C) A. Gordon +## license AGPLv3+ +## +use strict; +use warnings; +use Data::Dumper; +use IO::Handle; +use Getopt::Long qw(:config bundling no_ignore_case_always); + +## Forward declarations +sub add_standard_sort_param(@); +sub add_standard_sort_param_value(@); +sub forbidden_sort_param(@); +sub show_help(); +sub show_version(); +sub show_examples(); +sub parse_commandline_options(); +sub reassign_input_output(); +sub process_header_lines(); +sub run_sort(); +sub read_line_non_buffered(); + + +## +## Runtime options +## +my $PROGRAM="sort-header"; +my $VERSION=0.4; + +my $check_only=undef; +my $input_file=undef; +my $output_file=undef; +my $field_separator=undef; +my $header_lines =1 ; +my $debug=undef; +my $sort_exit_code=1; #by default, assume some error + +my @sort_options; + +## +## Program Start +## +parse_commandline_options(); +reassign_input_output(); +process_header_lines(); +run_sort(); +exit($sort_exit_code); +## +## Program End +## + +sub show_examples() +{ +print<<EOF; +Sorting a file with a header line: + +\$ cat input.txt +Fruit Color Price +Banana Yellow 4.1 +Avocado Green 8.0 +Apple Red 3.0 +Melon Green 6.1 + +# By default, 'sort-header' assumes 1 header line +# (no need to use --header in this case). + +\$ sort-header -k3,3nr input.txt +Fruit Color Price +Avocado Green 8.0 +Melon Green 6.1 +Banana Yellow 4.1 +Apple Red 3.0 + +EOF + exit(0); +} + +sub show_help() +{ +print<<EOF; +${PROGRAM}: Wrapper for GNU sort, allowing sorting files with header lines. + +Usage: $PROGRAM [HEADER-OPTIONS] [GNU sort Options] [INPUT-FILE] + +HEADER-OPTIONS: the following options are supported by '${PROGRAM}': + + --header N = Treat the first N lines as header lines. + These line will NOT be sorted. They will be passed + directly to the output file. (default: 1) + + --version = Print ${PROGRAM}'s version. + + --debugheader = Print debug messages (relating to ${PROGRAM}'s operation). + + --help = Show this help screen. + + --examples = Show usage examples. + +GNU sort options: + Most of the standard GNU sort options are supported and passed to GNU sort. + The following options can not be used with '${PROGRAM}': + + -m --merge => ${PROGRAM} can only sort one file, not merge multiple files. + -c -C --check => Currently not supported + --files0-from => Currently not supported + -z --zero-terminated => Currently not supported + +INPUT-FILE: + If INPUT-FILE is not specified, $PROGRAM will use STDIN (just like GNU sort). + +EOF + exit(0); +} + +sub show_version() +{ +print<<EOF; +$PROGRAM $VERSION +Copyright (C) 2010 A. Gordon (gordon\@cshl.edu) +License AGPLv3+: Affero GPL version 3 or later (http://www.gnu.org/licenses/agpl.html) + +To see the GNU's sort version, run: + sort --version +EOF + exit(0); +} + +sub parse_commandline_options() +{ + my $rc = GetOptions( + "ignore-leading-blanks|b" => \&add_standard_sort_param, + "dictionary-order|d" => \&add_standard_sort_param, + "ignore-case|f" => \&add_standard_sort_param, + "general-numeric-sort|g" => \&add_standard_sort_param, + "ignore-nonprinting|i" => \&add_standard_sort_param, + "month-sort|M" => \&add_standard_sort_param, + "human-numeric-sort|h" => \&add_standard_sort_param, + "numeric-sort|n" => \&add_standard_sort_param, + "random-source=s" => \&add_standard_sort_param_value, + "random-sort|R" => \&add_standard_sort_param, + "reverse|r" => \&add_standard_sort_param, + "sort=s" => \&add_standard_sort_param_value, + "version-sort|V" => \&add_standard_sort_param, + + "check|c" => \&forbidden_sort_param, + "C" => \&forbidden_sort_param, + "compress-program=s" => \&add_standard_sort_param_value, + "debug" => \&add_standard_sort_param, + + "files0-from=s" => \&forbidden_sort_param, + + "key|k=s" => \&add_standard_sort_param_value, + "merge|m" => \&forbidden_sort_param, + "batch-size=i" => \&forbidden_sort_param, + + "parallel=i" => \&add_standard_sort_param_value, + + "output|o=s" => \$output_file, + + "stable|s" => \&add_standard_sort_param, + "buffer-size|S=s" => \&add_standard_sort_param_value, + + "field-separator|t=s" => \&add_standard_sort_param_value, + "temporary-directory|T=s" => \&add_standard_sort_param_value, + "unique|u" => \&add_standard_sort_param, + + "zero-terminated|z" => \&forbidden_sort_param, + + "help" => \&show_help, + "version" => \&show_version, + "examples" => \&show_examples, + + "header=i" => \$header_lines, + "debugheader" => \$debug, + ); + + exit 1 unless $rc; + + my @INPUT_FILES = @ARGV; + + die "$PROGRAM: error: invalid number of header lines ($header_lines)\n" unless $header_lines>=0; + die "$PROGRAM: error: Multiple input files specified. This program can sort only a signle file.\n" if (scalar(@INPUT_FILES)>1); + $input_file = shift @INPUT_FILES if scalar(@INPUT_FILES)==1; + + if ($debug) { + warn "$PROGRAM: number of header lines = $header_lines\n"; + warn "$PROGRAM: PASS-to-Sort options:\n", Dumper(\@sort_options), "\n"; + } +} + +sub reassign_input_output() +{ + if ($output_file) { + warn "$PROGRAM: Re-assigning STDOUT to '$output_file'\n" if $debug; + open OUTPUT, '>', $output_file or die "$PROGRAM: Error: failed to create output file '$output_file': $!\n"; + STDOUT->fdopen(\*OUTPUT, 'w') or die "$PROGRAM: Error: failed to reassign STDOUT to '$output_file': $!\n"; + } + + + if ($input_file) { + warn "$PROGRAM: Re-assigning STDIN to '$input_file'\n" if $debug; + open INPUT, '<', $input_file or die "$PROGRAM: Error: failed to open input file '$input_file': $!\n"; + STDIN->fdopen(\*INPUT, 'r') or die "$PROGRAM: Error: failed to reassign STDIN to '$input_file': $!\n"; + } +} + +sub process_header_lines() +{ + warn "$PROGRAM: Reading $header_lines header lines...\n" if $debug; + for (my $i=0; $i<$header_lines; $i++) { + my $line = read_line_non_buffered(); + exit unless defined $line; + print $line; + } +} + +sub run_sort() +{ + warn "$PROGRAM: Running GNU sort...\n" if $debug; + system('sort', @sort_options); + if ($? == -1) { + die "$PROGRAM: Error: failed to execute 'sort': $!\n"; + } + elsif ($? & 127) { + my $signal = ($? & 127); + kill 2, $$ if $signal == 2; ##if sort was interrupted (CTRL-C) - just pass it on and commit suicide + die "$PROGRAM: Error: 'sort' child-process died with signal $signal\n"; + } + else { + $sort_exit_code = ($? >> 8); + } +} + + +sub add_standard_sort_param(@) +{ + my ($obj)= @_; + add_standard_sort_param_value($obj, undef); +} + +sub add_standard_sort_param_value(@) +{ + my ($obj,$value)= @_; + + my $option = "" . $obj ; #stringify the optino object, get the option name. + + if (length($option)==1) { + $option = "-" . $option ; + } else { + $option = "--" . $option ; + } + push @sort_options, $option ; + push @sort_options, $value if $value; +} + +sub forbidden_sort_param(@) +{ + my ($obj,$value)= @_; + my $option = "" . $obj ; #stringify the optino object, get the option name. + + die "$PROGRAM: Error: option '$option' can not be used with this program. If you must use it, run GNU sort directly. see --help for more details.\n"; +} + +sub read_line_non_buffered() +{ + my $line = ''; + while ( 1 ) { + my $c; + my $rc = sysread STDIN, $c, 1; + die "$PROGRAM: STDIN Read error: $!" unless defined $rc; + return $line if $rc==0 && $line; + return undef if $rc==0 && (!$line); + $line .= $c ; + return $line if ( $c eq "\n"); + } +} +
--- a/sort.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/sort.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,32 +1,32 @@ -<tool id="tp_sort_header_tool" name="Sort" version="0.1.1"> +<tool id="tp_sort_header_tool" name="Sort" version="@BASE_VERSION@.0"> <description>data in ascending or descending order</description> - <requirements> - <requirement type="package" version="8.21">gnu_coreutils</requirement> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> - </requirements> - <command interpreter="sh"> - #if int($header) > 0: - (sed -u '${header}'q && sort $unique $ignore_case --stable -t ' ' - - #for $key in $sortkeys - '-k ${key.column}${key.order}${key.style},${key.column}' - #end for + </expand> + <version_command>sort --version | head -n 1</version_command> + <command> +<![CDATA[ + ( + #if int($header) > 0: + sed -u '${header}'q && + #end if - ) < '${infile}' > '${outfile}' - #else: - (sort $unique $ignore_case --stable -t ' ' + sort $unique $ignore_case --stable -t ' ' - #for $key in $sortkeys - '-k ${key.column}${key.order}${key.style},${key.column}' - #end for + #for $key in $sortkeys: + -k '${key.column}${key.order}${key.style},${key.column}' + #end for - ) < '${infile}' > '${outfile}' - #end if + ) < '${infile}' > '${outfile}' +]]> </command> - <inputs> - <param format="txt" name="infile" type="data" label="Sort Query" /> - <param name="header" type="integer" size="5" value="0" label="Number of header lines" help="These will be ignored during sort."> + <param format="tabular" name="infile" type="data" label="Sort Query" /> + <param name="header" type="integer" size="5" value="0" + label="Number of header lines" help="These will be ignored during sort."> <validator type="in_range" message="Negative values are not allowed." min="0"/> </param> @@ -37,56 +37,67 @@ <option value="r">Descending order</option> </param> <param name="style" type="select" display="radio" label="Flavor"> - <option value="n">Fast numeric sort ([-n])</option> - <option value="g">General numeric sort ( scientific notation [-g])</option> - <option value="V">Natural/Version sort ([-V]) </option> + <option value="n">Fast numeric sort (-n)</option> + <option value="g">General numeric sort ( scientific notation -g)</option> + <option value="V">Natural/Version sort (-V) </option> <option value="">Alphabetical sort</option> <option value="h">Human-readable numbers (-h)</option> - <option value="R">Random order</option> + <option value="R">Random order (-R)</option> </param> </repeat> <param name="unique" type="boolean" checked="false" truevalue="--unique" falsevalue="" - label="Output unique values" help="Print only unique values (based on sorted key columns. See help section for details." /> - - <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." /> + label="Output unique values" help="Print only unique values, based on sorted key columns. See help section for details. (--unique)" /> + <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" + label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters. (-i)" /> </inputs> <outputs> <data format="input" name="outfile" metadata_source="infile"/> </outputs> <tests> - <!-- anyone knows how to write tests with repeat tags --> <test> <param name="infile" value="sort_in1.bed"/> - <param name="column" value="1"/> - <param name="style" value=""/> - <param name="order" value="ASC"/> - <param name="other_column" value="3"/> - <param name="other_style" value="n"/> - <param name="other_order" value="r"/> - <output name="out_file1" file="sort_out1.bed"/> + <param name="header" value="3"/> + <repeat name="sortkeys"> + <param name="column" value="1"/> + <param name="style" value=""/> + <param name="order" value=""/> + </repeat> + <repeat name="sortkeys"> + <param name="column" value="3"/> + <param name="style" value="n"/> + <param name="order" value="r"/> + </repeat> + <output name="outfile" file="sort_result1.bed"/> </test> <test> <param name="infile" value="sort_in1.bed"/> - <param name="column" value="1"/> - <param name="style" value=""/> - <param name="order" value="ASC"/> - <param name="other_column" value="3"/> - <param name="other_style" value="n"/> - <param name="other_order" value=""/> - <output name="out_file1" file="sort_out2.bed"/> + <param name="header" value="3"/> + <repeat name="sortkeys"> + <param name="column" value="1"/> + <param name="style" value=""/> + <param name="order" value=""/> + </repeat> + <repeat name="sortkeys"> + <param name="column" value="3"/> + <param name="style" value="n"/> + <param name="order" value=""/> + </repeat> + <output name="outfile" file="sort_result2.bed"/> </test> <test> <param name="infile" value="sort_in2.bed"/> - <param name="column" value="5"/> - <param name="style" value="g"/> - <param name="order" value=""/> - <output name="out_file1" file="sort_out3.bed"/> + <repeat name="sortkeys"> + <param name="column" value="5"/> + <param name="style" value="g"/> + <param name="order" value=""/> + </repeat> + <output name="outfile" file="sort_result3.bed"/> </test> </tests> <help> - +<![CDATA[ **What it does** This tool sorts an input file. @@ -159,9 +170,7 @@ If you're planning to use the file with another tool that expected sorted files (such as *join*), you should use the **Alphabetical sort**, not the **Natural Sort**. Natural sort order is easier for humans, but is unnatural for computer programs. ------ - -*sort-header* is was written by A. Gordon ( gordon at cshl dot edu ) - +@REFERENCES@ +]]> </help> </tool>
--- a/sort_rows.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/sort_rows.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,26 +1,37 @@ -<tool id="tp_sort_rows" name="Sort a row" version="0.0.1"> - <description>according to their columns</description> - <command>python -c 'for line in ["\t".join(sorted(line.strip().split("\t"))) for line in open("$input").readlines() ]: print line' > $outfile</command> - <inputs> - <param format="tabular" name="input" type="data" label="Tabular file that should be sorted"/> - </inputs> - <outputs> - <data format="tabular" name="outfile" metadata_source="input"/> - </outputs> - <options sanitize="False"/> - <tests> - - </tests> - <help> - +<tool id="tp_sort_rows" name="Sort a row" version="@BASE_VERSION@.0"> + <description>according to their columns</description> + <macros> + <import>macros.xml</import> + </macros> + <command> +<![CDATA[ + python -c 'for line in ( "\t".join(sorted(line.strip().split("\t"))) for line in open("$infile") ): print line' > $outfile +]]> + </command> + <inputs> + <param format="tabular" name="infile" type="data" label="Tabular file that should be sorted"/> + </inputs> + <outputs> + <data format="tabular" name="outfile" metadata_source="infile"/> + </outputs> + <options sanitize="False"/> + <tests> + <test> + <param name="infile" value="sort_rows1.tabular" ftype="tabular" /> + <output name="outfile" file="sort_rows_results1.bed"/> + </test> + </tests> + <help> +<![CDATA[ .. class:: infomark -**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* +**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* **What it does** That tool sorts each row in a TAB separated file, according to their columns. In other words: It is a sorted reordering of all columns. - -</help> +@REFERENCES@ +]]> + </help> </tool>
--- a/sorted_uniq.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/sorted_uniq.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,53 +1,95 @@ -<tool id="tp_uniq_tool" name="Unique lines"> +<tool id="tp_uniq_tool" name="Unique lines" version="@BASE_VERSION@.0"> <description>assuming sorted input file</description> - <requirements> - <requirement type="package" version="8.21">gnu_coreutils</requirement> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> - </requirements> + </expand> + <version_command>uniq --version | head -n 1</version_command> <command> - uniq - -f - $skipfields - $count - $repeated - $ignorecase - $uniqueonly - $input - - ## feature is not yet released, it will be in the next 8.22 version - ##--group=$group - - #if $count: - # count will print the count with spaces infrontof the line and - # with a space (not a tab) after the number, we need to cahnge that - | sed -e 's/ *//' -e 's/ /\t/' > $output +<![CDATA[ + uniq + #if $skipfields: + -f $skipfields + #end if + $ignorecase + + #if $grouping.grouping_select == 'yes': + --group=$grouping.group + #else: + $grouping.count + $grouping.repeated + $grouping.uniqueonly + #end if + + "$infile" + + #if $grouping.grouping_select == 'no' and $grouping.count: + ## count will print the count with spaces in front of the line and + ## with a space (not a tab) after the number, we need to cahnge that + | sed -e 's/ *//' -e 's/ /\t/' #end if - > $output + > "$outfile" +]]> </command> - <inputs> - <param format="txt,tabular" name="input" type="data" label="File to scan for unique values" help="Make sure you have sorted this file" /> - - <param name="count" type="boolean" label="Counting number of occurrences [-c]" help="Prefix lines by the number of occurrences" truevalue="-c" falsevalue="" /> - <param name="repeated" type="boolean" label="Only print duplicate lines [-d]" truevalue="-d" falsevalue="" /> - <param name="ignorecase" type="boolean" label="Ignore differences in case when comparing [-i]" truevalue="-i" falsevalue="" /> - <param name="uniqueonly" type="boolean" label="Only print unique lines [-u]" checked="True" truevalue="-u" falsevalue="" /> - <param name="skipfields" type="integer" label="Avoid comparing the first N fields [-f]" help="Use zero to start from the first field" size="2" value="0" /> + <param name="infile" format="tabular" type="data" + label="File to scan for unique values" help="Make sure you have sorted this file" /> - <!-- - <param name="group" type="select" label="Output all lines, and delimit each unique group."> - <option value="separate">Separate unique groups with a single delimiter</option> - <option value="prepend">Output a delimiter before each group of unique items</option> - <option value="append">Output a delimiter after each group of unique items.</option> - <option value="both">Output a delimiter around each group of unique items.</option> - </param> - --> + <conditional name="grouping"> + <param name="grouping_select" type="select" label="Do you want to group each unique group?"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"> + <param name="count" type="boolean" truevalue="-c" falsevalue="" + label="Counting number of occurrences" help="Prefix lines by the number of occurrences. (-c)" /> + <param name="repeated" type="boolean" truevalue="-d" falsevalue="" + label="Only print duplicate lines" help="(-d)"/> + <param name="uniqueonly" type="boolean" checked="True" truevalue="-u" falsevalue="" + label="Only print unique lines" help="(-u)" /> + </when> + <when value="yes"> + <param name="group" type="select" optional="True" + label="Output all lines, and delimit each unique group" help="(--group)"> + <option value="">No grouping at all</option> + <option value="separate">Separate unique groups with a single delimiter</option> + <option value="prepend">Output a delimiter before each group of unique items</option> + <option value="append">Output a delimiter after each group of unique items</option> + <option value="both">Output a delimiter around each group of unique items</option> + </param> + </when> + </conditional> + + <param name="ignorecase" type="boolean" truevalue="-i" falsevalue="" + label="Ignore differences in case when comparing" help="(-i)"/> + <param name="skipfields" type="integer" size="2" value="0" + label="Avoid comparing the first N fields" help="Use zero to start from the first field. (-f)" /> + </inputs> - <outputs> - <data format="input" name="output" metadata_source="input"/> + <data format="input" name="outfile" metadata_source="infile"/> </outputs> + <tests> + <test> + <param name="infile" value="sorted_uniq1.tabular" /> + <param name="grouping_select" value="no"/> + <param name="count" value="True"/> + <param name="ignorecase" value="True"/> + <param name="uniqueonly" value="True"/> + <output name="outfile" file="sorted_uniq_results1.tabular" /> + </test> + <test> + <param name="infile" value="sorted_uniq1.tabular" /> + <param name="ignorecase" value="True"/> + <param name="grouping_select" value="yes"/> + <param name="group" value="separate"/> + <output name="outfile" file="sorted_uniq_results2.tabular" /> + </test> + </tests> <help> +<![CDATA[ This tool takes a sorted file and look for lines that are unique. .. class:: warningmark @@ -58,5 +100,7 @@ You can sort your file using either the "Sort" tool in "Filter and Sort", or the "Sort" tool in "Unix Tools". +@REFERENCES@ +]]> </help> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tac.xml Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,84 @@ +<tool id="tp_tac" name="tac" version="@BASE_VERSION@.0"> + <description>reverse a file (reverse cat)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>tac --version | head -n 1</version_command> + <command> +<![CDATA[ + tac + #if str($separator.separator_select) == "yes": + $separator.before + $separator.regex + $separator.separator_string + #end if + "$input" + > "$outfile" +]]> + </command> + <inputs> + <param name="input" type="data" format="txt" label="Input file"/> + <conditional name="separator"> + <param name="separator_select" type="select" label="Do you want to use a separator other than newline?"> + <option value="yes">Yes</option> + <option value="no">No</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="before" type="boolean" truevalue="-b" falsevalue="" checked="True" + label="Attach the separator before instead of after" help="(--before)"/> + <param name="regex" type="boolean" truevalue="-r" falsevalue="" checked="True" + label="Interpret the separator as a regular expression" help="(--regex)"/> + <param name="separator_string" size="5" type="text" value="" + label="Separator to use" help="(--separator)" /> + </when> + </conditional> + </inputs> + <outputs> + <data name="outfile" format="input" metadata_source="input"/> + </outputs> + <tests> + <test> + <param name="input" value="1.bed" ftype="txt"/> + <param name="before" value=""/> + <param name="regex" value=""/> + <output name="outfile" file="tac_result1.txt"/> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +tac is a Linux command that allows you to see a file line-by-line backwards. It is named by analogy with cat. + +Mandatory arguments to long options are mandatory for short options too: + + -b, --before attach the separator before instead of after + -r, --regex interpret the separator as a regular expression + -s, --separator=STRING use STRING as the separator instead of newline + +----- + +**Example** + +Input file: + + 0 1 2 3 4 5 # 6 7 8 9 + + +default settings: + + 9 8 7 6 # 5 4 3 2 1 0 + +with option -s 5: + # 6 7 8 9 0 1 2 3 4 5 + +with option -b and -s 5: + + 5 # 6 7 8 9 0 1 2 3 4 + +@REFERENCES@ +]]> + </help> +</tool>
--- a/tail.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/tail.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,29 +1,45 @@ -<tool id="tp_tail_tool" name="Select last" version="0.1.1"> +<tool id="tp_tail_tool" name="Select last" version="@BASE_VERSION@.0"> <description>lines from a dataset (tail)</description> - <requirements> - <requirement type="package" version="8.21">gnu_coreutils</requirement> - </requirements> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>tail --version | head -n 1</version_command> <command> - tail --lines $count '$input' > '$output' +<![CDATA[ + tail + --lines $complement$num_lines + '$infile' + > '$outfile' +]]> </command> - <inputs> - <param format="txt" name="input" type="data" label="file to cut" /> - <param name="count" type="integer" size="5" value="10" label="Output last X lines" help="" /> + <param name="infile" format="txt" type="data" label="Text file" /> + <param name="complement" type="select" label="Operation"> + <option value="">Keep last lines</option> + <option value="+">Keep everything from this line on</option> + </param> + <param name="num_lines" type="integer" size="5" value="10" + label="Number of lines" help="These will be kept (depending on 'operation'). (--lines)" /> </inputs> - <outputs> - <data format="input" name="output" metadata_source="input"/> + <data name="outfile" format="input" metadata_source="infile"/> </outputs> <tests> <test> - <param name="count" value="10"/> <param name="infile" value="1.bed"/> - <output name="out_file1" file="eq-showtail.dat"/> + <param name="num_lines" value="10"/> + <output name="outfile" file="tail_results1.bed"/> + </test> + <test> + <param name="infile" value="1.bed"/> + <param name="num_lines" value="10"/> + <param name="complement" value="+"/> + <output name="outfile" file="tail_results2.bed"/> </test> </tests> <help> - +<![CDATA[ **What it does** This tool outputs specified number of lines from the **end** of a dataset @@ -45,5 +61,7 @@ chr7 57341 57361 D17003_CTCF_R7 375 + chr7 57457 57477 D17003_CTCF_R3 188 + +@REFERENCES@ +]]> </help> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/easyjoin1.tabular Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,5 @@ +Fruit Color +Apple red +Banana yellow +Orange orange +Melon green
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/easyjoin2.tabular Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,5 @@ +Fruit Price +Orange 7 +Avocado 8 +Apple 4 +Banana 3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/easyjoin_result1.tabular Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,6 @@ +Fruit Color Price +Apple red 4 +Avocado 0 8 +Banana yellow 3 +Melon green 0 +Orange orange 7
--- a/test-data/eq-showbeginning.dat Sun Oct 06 08:22:36 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - -chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + -chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - -chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + -chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - -chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - -chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + -chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - -chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + -chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 -
--- a/test-data/eq-showtail.dat Sun Oct 06 08:22:36 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - -chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - -chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + -chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - -chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + -chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - -chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + -chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - -chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + -chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_and_replace1.txt Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,5 @@ +I have a dream that one day this nation will rise up, and live out the true meaning of its creed: ‘We hold these truths to be self-evident: that all men are created equal.’ +I have a dream that one day on the red hills of Georgia the sons of former slaves and the sons of former slave owners will be able to sit down together at a table of brotherhood. +I have a dream that one day even the state of Mississippi, a state sweltering with the heat of injustice and sweltering with the heat of oppression, will be transformed into an oasis of freedom and justice. +I have a dream that my four little children will one day live in a nation where they will not be judged by the color of their skin but by the content of their character. +I have a dream today!
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_and_replace2.txt Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,3 @@ +1 foo chr1 gene +2 bar chr2 luther +3 baz chrMT gene1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_and_replace_results1.txt Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,5 @@ +I have a dream that one great day this nation will rise up, and live out the true meaning of its creed: ‘We hold these truths to be self-evident: that all men are created equal.’ +I have a dream that one great day on the red hills of Georgia the sons of former slaves and the sons of former slave owners will be able to sit down together at a table of brotherhood. +I have a dream that one great day even the state of Mississippi, a state sweltering with the heat of injustice and sweltering with the heat of oppression, will be transformed into an oasis of freedom and justice. +I have a dream that my four little children will one great day live in a nation where they will not be judged by the color of their skin but by the content of their character. +I have a dream today!
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_and_replace_results2.txt Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,3 @@ +1 foo 1 gene +2 bar 2 luther +3 baz MT gene1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/head_results1.bed Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,10 @@ +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/multijoin1.txt Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,9 @@ +chr4 888449 890171 FBtr0308778 0 + 266 1527 1722 +chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850 +chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831 +chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831 +chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831 +chr4 995793 996435 FBtr0111046 0 + 7 166 642 +chr4 995793 997931 FBtr0111044 0 + 28 683 2138 +chr4 995793 997931 FBtr0111045 0 + 28 683 2138 +chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/multijoin2.txt Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,9 @@ +chr4 90286 134453 FBtr0309803 0 + 657 29084 44167 +chr4 251355 266499 FBtr0089116 0 + 56 1296 15144 +chr4 252050 266506 FBtr0308086 0 + 56 1296 14456 +chr4 252050 266506 FBtr0308087 0 + 56 1296 14456 +chr4 252053 266528 FBtr0300796 0 + 56 1296 14475 +chr4 252053 266528 FBtr0300800 0 + 56 1296 14475 +chr4 252055 266528 FBtr0300798 0 + 56 1296 14473 +chr4 252055 266528 FBtr0300799 0 + 56 1296 14473 +chr4 252541 266528 FBtr0300797 0 + 56 1296 13987
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/multijoin3.txt Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,8 @@ +chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850 +chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831 +chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831 +chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831 +chr4 995793 996435 FBtr0111046 0 + 5 304 642 +chr4 995793 997931 FBtr0111044 0 + 17 714 2138 +chr4 995793 997931 FBtr0111045 0 + 17 714 2138 +chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690
--- a/test-data/sort_out1.bed Sun Oct 06 08:22:36 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -# comment 1 \n\n''" again -# comment 2 **}"''' special -# comment 3 @n/n""" characters -chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, -chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, -chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, -chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, -chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, -chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, -chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, -chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, -chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, -chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, -chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, -chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, -chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, -chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, -chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, -chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, -chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, -chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, -chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, -chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, -chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, -chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, -chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, -chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, -chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, -chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759, -chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, -chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, -chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790,
--- a/test-data/sort_out2.bed Sun Oct 06 08:22:36 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -# comment 1 \n\n''" again -# comment 2 **}"''' special -# comment 3 @n/n""" characters -chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, -chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, -chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, -chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, -chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, -chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, -chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, -chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, -chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, -chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, -chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, -chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, -chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, -chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, -chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, -chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, -chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, -chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, -chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, -chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, -chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, -chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, -chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, -chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, -chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, -chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790, -chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, -chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, -chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759,
--- a/test-data/sort_out3.bed Sun Oct 06 08:22:36 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -chr1 300 400 feature4 1.1e-05 + -chr20 800 900 feature2 1.1 + -chr10 100 200 feature1 100.01 + -chr21 300 500 feature5 1.1e2 + -chr2 500 600 feature3 1000.1 + -chr15 700 800 feature6 1.1e4 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_result1.bed Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,32 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, +chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, +chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, +chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, +chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, +chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, +chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, +chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, +chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, +chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, +chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, +chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, +chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759, +chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, +chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, +chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_result2.bed Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,32 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, +chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, +chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, +chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, +chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, +chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, +chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, +chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, +chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, +chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, +chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, +chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, +chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, +chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790, +chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, +chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, +chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_result3.bed Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,6 @@ +chr20 800 900 feature2 1.1 + +chr1 300 400 feature4 1.1e-05 + +chr21 300 500 feature5 1.1e2 + +chr15 700 800 feature6 1.1e4 + +chr10 100 200 feature1 100.01 + +chr2 500 600 feature3 1000.1 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_rows1.tabular Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,4 @@ +a b c d e f +f e d c b a +7 6 5 4 3 2 +1 2 3 4 5 6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_rows_results1.bed Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,4 @@ +a b c d e f +a b c d e f +2 3 4 5 6 7 +1 2 3 4 5 6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted_uniq1.tabular Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,8 @@ +a +A +b +b +B +c +d +e
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted_uniq_results1.tabular Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,3 @@ +1 c +1 d +1 e
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted_uniq_results2.tabular Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,12 @@ +a +A + +b +b +B + +c + +d + +e
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tail_results1.bed Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,10 @@ +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tail_results2.bed Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,56 @@ +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unfold_column1.tabular Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,1 @@ +a b 1 2 3 4 5 c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unfold_column_result1.tabular Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,5 @@ +a b 1 c +a b 2 c +a b 3 c +a b 4 c +a b 5 c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unique_results1.bed Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,65 @@ +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 -
--- a/test-data/unix_grep_output2.html Sun Oct 06 08:22:36 2013 -0400 +++ b/test-data/unix_grep_output2.html Wed Jan 07 11:10:52 2015 -0500 @@ -1,9 +1,307 @@ -<html><body><pre> -GCTATAG<font color="blue"><b>AAATGT</b></font>TAACATCGAATGTACATTATAAC -CAGCTAACAATC<font color="blue"><b>AAGCGT</b></font>TACAGATTAGCCCCCCCC -GAACTTGCGTAACGTACAAAAATGCAAGCA<font color="blue"><b>AAAAGT</b></font> -GCTCTGTTAATCTAGA<font color="blue"><b>AAATGT</b></font>GTCTCCCCCCCCCC -<font color="blue"><b>AATCGT</b></font>ATAGCTCGGGCCGGATACTAGTACACCCCC -GATATAGTGGATAACTAATGCTCCCCCAG<font color="blue"><b>AACTGT</b></font>T -GAACGGACTATAGCCGGTATCCAAACAT<font color="blue"><b>AAATGT</b></font>TC -</pre></body></html> +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> +<style type="text/css"> +.ef0,.f0 { color: #000000; } .eb0,.b0 { background-color: #000000; } +.ef1,.f1 { color: #AA0000; } .eb1,.b1 { background-color: #AA0000; } +.ef2,.f2 { color: #00AA00; } .eb2,.b2 { background-color: #00AA00; } +.ef3,.f3 { color: #AA5500; } .eb3,.b3 { background-color: #AA5500; } +.ef4,.f4 { color: #0000AA; } .eb4,.b4 { background-color: #0000AA; } +.ef5,.f5 { color: #AA00AA; } .eb5,.b5 { background-color: #AA00AA; } +.ef6,.f6 { color: #00AAAA; } .eb6,.b6 { background-color: #00AAAA; } +.ef7,.f7 { color: #AAAAAA; } .eb7,.b7 { background-color: #AAAAAA; } +.ef8, .f0 > .bold,.bold > .f0 { color: #555555; font-weight: normal; } +.ef9, .f1 > .bold,.bold > .f1 { color: #FF5555; font-weight: normal; } +.ef10,.f2 > .bold,.bold > .f2 { color: #55FF55; font-weight: normal; } +.ef11,.f3 > .bold,.bold > .f3 { color: #FFFF55; font-weight: normal; } +.ef12,.f4 > .bold,.bold > .f4 { color: #5555FF; font-weight: normal; } +.ef13,.f5 > .bold,.bold > .f5 { color: #FF55FF; font-weight: normal; } +.ef14,.f6 > .bold,.bold > .f6 { color: #55FFFF; font-weight: normal; } +.ef15,.f7 > .bold,.bold > .f7 { color: #FFFFFF; font-weight: normal; } +.eb8 { background-color: #555555; } +.eb9 { background-color: #FF5555; } +.eb10 { background-color: #55FF55; } +.eb11 { background-color: #FFFF55; } +.eb12 { background-color: #5555FF; } +.eb13 { background-color: #FF55FF; } +.eb14 { background-color: #55FFFF; } +.eb15 { background-color: #FFFFFF; } +.ef16 { color: #000000; } .eb16 { background-color: #000000; } +.ef17 { color: #00005f; } .eb17 { background-color: #00005f; } +.ef18 { color: #000087; } .eb18 { background-color: #000087; } +.ef19 { color: #0000af; } .eb19 { background-color: #0000af; } +.ef20 { color: #0000d7; } .eb20 { background-color: #0000d7; } +.ef21 { color: #0000ff; } .eb21 { background-color: #0000ff; } +.ef22 { color: #005f00; } .eb22 { background-color: #005f00; } +.ef23 { color: #005f5f; } .eb23 { background-color: #005f5f; } +.ef24 { color: #005f87; } .eb24 { background-color: #005f87; } +.ef25 { color: #005faf; } .eb25 { background-color: #005faf; } +.ef26 { color: #005fd7; } .eb26 { background-color: #005fd7; } +.ef27 { color: #005fff; } .eb27 { background-color: #005fff; } +.ef28 { color: #008700; } .eb28 { background-color: #008700; } +.ef29 { color: #00875f; } .eb29 { background-color: #00875f; } +.ef30 { color: #008787; } .eb30 { background-color: #008787; } +.ef31 { color: #0087af; } .eb31 { background-color: #0087af; } +.ef32 { color: #0087d7; } .eb32 { background-color: #0087d7; } +.ef33 { color: #0087ff; } .eb33 { background-color: #0087ff; } +.ef34 { color: #00af00; } .eb34 { background-color: #00af00; } +.ef35 { color: #00af5f; } .eb35 { background-color: #00af5f; } +.ef36 { color: #00af87; } .eb36 { background-color: #00af87; } +.ef37 { color: #00afaf; } .eb37 { background-color: #00afaf; } +.ef38 { color: #00afd7; } .eb38 { background-color: #00afd7; } +.ef39 { color: #00afff; } .eb39 { background-color: #00afff; } +.ef40 { color: #00d700; } .eb40 { background-color: #00d700; } +.ef41 { color: #00d75f; } .eb41 { background-color: #00d75f; } +.ef42 { color: #00d787; } .eb42 { background-color: #00d787; } +.ef43 { color: #00d7af; } .eb43 { background-color: #00d7af; } +.ef44 { color: #00d7d7; } .eb44 { background-color: #00d7d7; } +.ef45 { color: #00d7ff; } .eb45 { background-color: #00d7ff; } +.ef46 { color: #00ff00; } .eb46 { background-color: #00ff00; } +.ef47 { color: #00ff5f; } .eb47 { background-color: #00ff5f; } +.ef48 { color: #00ff87; } .eb48 { background-color: #00ff87; } +.ef49 { color: #00ffaf; } .eb49 { background-color: #00ffaf; } +.ef50 { color: #00ffd7; } .eb50 { background-color: #00ffd7; } +.ef51 { color: #00ffff; } .eb51 { background-color: #00ffff; } +.ef52 { color: #5f0000; } .eb52 { background-color: #5f0000; } +.ef53 { color: #5f005f; } .eb53 { background-color: #5f005f; } +.ef54 { color: #5f0087; } .eb54 { background-color: #5f0087; } +.ef55 { color: #5f00af; } .eb55 { background-color: #5f00af; } +.ef56 { color: #5f00d7; } .eb56 { background-color: #5f00d7; } +.ef57 { color: #5f00ff; } .eb57 { background-color: #5f00ff; } +.ef58 { color: #5f5f00; } .eb58 { background-color: #5f5f00; } +.ef59 { color: #5f5f5f; } .eb59 { background-color: #5f5f5f; } +.ef60 { color: #5f5f87; } .eb60 { background-color: #5f5f87; } +.ef61 { color: #5f5faf; } .eb61 { background-color: #5f5faf; } +.ef62 { color: #5f5fd7; } .eb62 { background-color: #5f5fd7; } +.ef63 { color: #5f5fff; } .eb63 { background-color: #5f5fff; } +.ef64 { color: #5f8700; } .eb64 { background-color: #5f8700; } +.ef65 { color: #5f875f; } .eb65 { background-color: #5f875f; } +.ef66 { color: #5f8787; } .eb66 { background-color: #5f8787; } +.ef67 { color: #5f87af; } .eb67 { background-color: #5f87af; } +.ef68 { color: #5f87d7; } .eb68 { background-color: #5f87d7; } +.ef69 { color: #5f87ff; } .eb69 { background-color: #5f87ff; } +.ef70 { color: #5faf00; } .eb70 { background-color: #5faf00; } +.ef71 { color: #5faf5f; } .eb71 { background-color: #5faf5f; } +.ef72 { color: #5faf87; } .eb72 { background-color: #5faf87; } +.ef73 { color: #5fafaf; } .eb73 { background-color: #5fafaf; } +.ef74 { color: #5fafd7; } .eb74 { background-color: #5fafd7; } +.ef75 { color: #5fafff; } .eb75 { background-color: #5fafff; } +.ef76 { color: #5fd700; } .eb76 { background-color: #5fd700; } +.ef77 { color: #5fd75f; } .eb77 { background-color: #5fd75f; } +.ef78 { color: #5fd787; } .eb78 { background-color: #5fd787; } +.ef79 { color: #5fd7af; } .eb79 { background-color: #5fd7af; } +.ef80 { color: #5fd7d7; } .eb80 { background-color: #5fd7d7; } +.ef81 { color: #5fd7ff; } .eb81 { background-color: #5fd7ff; } +.ef82 { color: #5fff00; } .eb82 { background-color: #5fff00; } +.ef83 { color: #5fff5f; } .eb83 { background-color: #5fff5f; } +.ef84 { color: #5fff87; } .eb84 { background-color: #5fff87; } +.ef85 { color: #5fffaf; } .eb85 { background-color: #5fffaf; } +.ef86 { color: #5fffd7; } .eb86 { background-color: #5fffd7; } +.ef87 { color: #5fffff; } .eb87 { background-color: #5fffff; } +.ef88 { color: #870000; } .eb88 { background-color: #870000; } +.ef89 { color: #87005f; } .eb89 { background-color: #87005f; } +.ef90 { color: #870087; } .eb90 { background-color: #870087; } +.ef91 { color: #8700af; } .eb91 { background-color: #8700af; } +.ef92 { color: #8700d7; } .eb92 { background-color: #8700d7; } +.ef93 { color: #8700ff; } .eb93 { background-color: #8700ff; } +.ef94 { color: #875f00; } .eb94 { background-color: #875f00; } +.ef95 { color: #875f5f; } .eb95 { background-color: #875f5f; } +.ef96 { color: #875f87; } .eb96 { background-color: #875f87; } +.ef97 { color: #875faf; } .eb97 { background-color: #875faf; } +.ef98 { color: #875fd7; } .eb98 { background-color: #875fd7; } +.ef99 { color: #875fff; } .eb99 { background-color: #875fff; } +.ef100 { color: #878700; } .eb100 { background-color: #878700; } +.ef101 { color: #87875f; } .eb101 { background-color: #87875f; } +.ef102 { color: #878787; } .eb102 { background-color: #878787; } +.ef103 { color: #8787af; } .eb103 { background-color: #8787af; } +.ef104 { color: #8787d7; } .eb104 { background-color: #8787d7; } +.ef105 { color: #8787ff; } .eb105 { background-color: #8787ff; } +.ef106 { color: #87af00; } .eb106 { background-color: #87af00; } +.ef107 { color: #87af5f; } .eb107 { background-color: #87af5f; } +.ef108 { color: #87af87; } .eb108 { background-color: #87af87; } +.ef109 { color: #87afaf; } .eb109 { background-color: #87afaf; } +.ef110 { color: #87afd7; } .eb110 { background-color: #87afd7; } +.ef111 { color: #87afff; } .eb111 { background-color: #87afff; } +.ef112 { color: #87d700; } .eb112 { background-color: #87d700; } +.ef113 { color: #87d75f; } .eb113 { background-color: #87d75f; } +.ef114 { color: #87d787; } .eb114 { background-color: #87d787; } +.ef115 { color: #87d7af; } .eb115 { background-color: #87d7af; } +.ef116 { color: #87d7d7; } .eb116 { background-color: #87d7d7; } +.ef117 { color: #87d7ff; } .eb117 { background-color: #87d7ff; } +.ef118 { color: #87ff00; } .eb118 { background-color: #87ff00; } +.ef119 { color: #87ff5f; } .eb119 { background-color: #87ff5f; } +.ef120 { color: #87ff87; } .eb120 { background-color: #87ff87; } +.ef121 { color: #87ffaf; } .eb121 { background-color: #87ffaf; } +.ef122 { color: #87ffd7; } .eb122 { background-color: #87ffd7; } +.ef123 { color: #87ffff; } .eb123 { background-color: #87ffff; } +.ef124 { color: #af0000; } .eb124 { background-color: #af0000; } +.ef125 { color: #af005f; } .eb125 { background-color: #af005f; } +.ef126 { color: #af0087; } .eb126 { background-color: #af0087; } +.ef127 { color: #af00af; } .eb127 { background-color: #af00af; } +.ef128 { color: #af00d7; } .eb128 { background-color: #af00d7; } +.ef129 { color: #af00ff; } .eb129 { background-color: #af00ff; } +.ef130 { color: #af5f00; } .eb130 { background-color: #af5f00; } +.ef131 { color: #af5f5f; } .eb131 { background-color: #af5f5f; } +.ef132 { color: #af5f87; } .eb132 { background-color: #af5f87; } +.ef133 { color: #af5faf; } .eb133 { background-color: #af5faf; } +.ef134 { color: #af5fd7; } .eb134 { background-color: #af5fd7; } +.ef135 { color: #af5fff; } .eb135 { background-color: #af5fff; } +.ef136 { color: #af8700; } .eb136 { background-color: #af8700; } +.ef137 { color: #af875f; } .eb137 { background-color: #af875f; } +.ef138 { color: #af8787; } .eb138 { background-color: #af8787; } +.ef139 { color: #af87af; } .eb139 { background-color: #af87af; } +.ef140 { color: #af87d7; } .eb140 { background-color: #af87d7; } +.ef141 { color: #af87ff; } .eb141 { background-color: #af87ff; } +.ef142 { color: #afaf00; } .eb142 { background-color: #afaf00; } +.ef143 { color: #afaf5f; } .eb143 { background-color: #afaf5f; } +.ef144 { color: #afaf87; } .eb144 { background-color: #afaf87; } +.ef145 { color: #afafaf; } .eb145 { background-color: #afafaf; } +.ef146 { color: #afafd7; } .eb146 { background-color: #afafd7; } +.ef147 { color: #afafff; } .eb147 { background-color: #afafff; } +.ef148 { color: #afd700; } .eb148 { background-color: #afd700; } +.ef149 { color: #afd75f; } .eb149 { background-color: #afd75f; } +.ef150 { color: #afd787; } .eb150 { background-color: #afd787; } +.ef151 { color: #afd7af; } .eb151 { background-color: #afd7af; } +.ef152 { color: #afd7d7; } .eb152 { background-color: #afd7d7; } +.ef153 { color: #afd7ff; } .eb153 { background-color: #afd7ff; } +.ef154 { color: #afff00; } .eb154 { background-color: #afff00; } +.ef155 { color: #afff5f; } .eb155 { background-color: #afff5f; } +.ef156 { color: #afff87; } .eb156 { background-color: #afff87; } +.ef157 { color: #afffaf; } .eb157 { background-color: #afffaf; } +.ef158 { color: #afffd7; } .eb158 { background-color: #afffd7; } +.ef159 { color: #afffff; } .eb159 { background-color: #afffff; } +.ef160 { color: #d70000; } .eb160 { background-color: #d70000; } +.ef161 { color: #d7005f; } .eb161 { background-color: #d7005f; } +.ef162 { color: #d70087; } .eb162 { background-color: #d70087; } +.ef163 { color: #d700af; } .eb163 { background-color: #d700af; } +.ef164 { color: #d700d7; } .eb164 { background-color: #d700d7; } +.ef165 { color: #d700ff; } .eb165 { background-color: #d700ff; } +.ef166 { color: #d75f00; } .eb166 { background-color: #d75f00; } +.ef167 { color: #d75f5f; } .eb167 { background-color: #d75f5f; } +.ef168 { color: #d75f87; } .eb168 { background-color: #d75f87; } +.ef169 { color: #d75faf; } .eb169 { background-color: #d75faf; } +.ef170 { color: #d75fd7; } .eb170 { background-color: #d75fd7; } +.ef171 { color: #d75fff; } .eb171 { background-color: #d75fff; } +.ef172 { color: #d78700; } .eb172 { background-color: #d78700; } +.ef173 { color: #d7875f; } .eb173 { background-color: #d7875f; } +.ef174 { color: #d78787; } .eb174 { background-color: #d78787; } +.ef175 { color: #d787af; } .eb175 { background-color: #d787af; } +.ef176 { color: #d787d7; } .eb176 { background-color: #d787d7; } +.ef177 { color: #d787ff; } .eb177 { background-color: #d787ff; } +.ef178 { color: #d7af00; } .eb178 { background-color: #d7af00; } +.ef179 { color: #d7af5f; } .eb179 { background-color: #d7af5f; } +.ef180 { color: #d7af87; } .eb180 { background-color: #d7af87; } +.ef181 { color: #d7afaf; } .eb181 { background-color: #d7afaf; } +.ef182 { color: #d7afd7; } .eb182 { background-color: #d7afd7; } +.ef183 { color: #d7afff; } .eb183 { background-color: #d7afff; } +.ef184 { color: #d7d700; } .eb184 { background-color: #d7d700; } +.ef185 { color: #d7d75f; } .eb185 { background-color: #d7d75f; } +.ef186 { color: #d7d787; } .eb186 { background-color: #d7d787; } +.ef187 { color: #d7d7af; } .eb187 { background-color: #d7d7af; } +.ef188 { color: #d7d7d7; } .eb188 { background-color: #d7d7d7; } +.ef189 { color: #d7d7ff; } .eb189 { background-color: #d7d7ff; } +.ef190 { color: #d7ff00; } .eb190 { background-color: #d7ff00; } +.ef191 { color: #d7ff5f; } .eb191 { background-color: #d7ff5f; } +.ef192 { color: #d7ff87; } .eb192 { background-color: #d7ff87; } +.ef193 { color: #d7ffaf; } .eb193 { background-color: #d7ffaf; } +.ef194 { color: #d7ffd7; } .eb194 { background-color: #d7ffd7; } +.ef195 { color: #d7ffff; } .eb195 { background-color: #d7ffff; } +.ef196 { color: #ff0000; } .eb196 { background-color: #ff0000; } +.ef197 { color: #ff005f; } .eb197 { background-color: #ff005f; } +.ef198 { color: #ff0087; } .eb198 { background-color: #ff0087; } +.ef199 { color: #ff00af; } .eb199 { background-color: #ff00af; } +.ef200 { color: #ff00d7; } .eb200 { background-color: #ff00d7; } +.ef201 { color: #ff00ff; } .eb201 { background-color: #ff00ff; } +.ef202 { color: #ff5f00; } .eb202 { background-color: #ff5f00; } +.ef203 { color: #ff5f5f; } .eb203 { background-color: #ff5f5f; } +.ef204 { color: #ff5f87; } .eb204 { background-color: #ff5f87; } +.ef205 { color: #ff5faf; } .eb205 { background-color: #ff5faf; } +.ef206 { color: #ff5fd7; } .eb206 { background-color: #ff5fd7; } +.ef207 { color: #ff5fff; } .eb207 { background-color: #ff5fff; } +.ef208 { color: #ff8700; } .eb208 { background-color: #ff8700; } +.ef209 { color: #ff875f; } .eb209 { background-color: #ff875f; } +.ef210 { color: #ff8787; } .eb210 { background-color: #ff8787; } +.ef211 { color: #ff87af; } .eb211 { background-color: #ff87af; } +.ef212 { color: #ff87d7; } .eb212 { background-color: #ff87d7; } +.ef213 { color: #ff87ff; } .eb213 { background-color: #ff87ff; } +.ef214 { color: #ffaf00; } .eb214 { background-color: #ffaf00; } +.ef215 { color: #ffaf5f; } .eb215 { background-color: #ffaf5f; } +.ef216 { color: #ffaf87; } .eb216 { background-color: #ffaf87; } +.ef217 { color: #ffafaf; } .eb217 { background-color: #ffafaf; } +.ef218 { color: #ffafd7; } .eb218 { background-color: #ffafd7; } +.ef219 { color: #ffafff; } .eb219 { background-color: #ffafff; } +.ef220 { color: #ffd700; } .eb220 { background-color: #ffd700; } +.ef221 { color: #ffd75f; } .eb221 { background-color: #ffd75f; } +.ef222 { color: #ffd787; } .eb222 { background-color: #ffd787; } +.ef223 { color: #ffd7af; } .eb223 { background-color: #ffd7af; } +.ef224 { color: #ffd7d7; } .eb224 { background-color: #ffd7d7; } +.ef225 { color: #ffd7ff; } .eb225 { background-color: #ffd7ff; } +.ef226 { color: #ffff00; } .eb226 { background-color: #ffff00; } +.ef227 { color: #ffff5f; } .eb227 { background-color: #ffff5f; } +.ef228 { color: #ffff87; } .eb228 { background-color: #ffff87; } +.ef229 { color: #ffffaf; } .eb229 { background-color: #ffffaf; } +.ef230 { color: #ffffd7; } .eb230 { background-color: #ffffd7; } +.ef231 { color: #ffffff; } .eb231 { background-color: #ffffff; } +.ef232 { color: #080808; } .eb232 { background-color: #080808; } +.ef233 { color: #121212; } .eb233 { background-color: #121212; } +.ef234 { color: #1c1c1c; } .eb234 { background-color: #1c1c1c; } +.ef235 { color: #262626; } .eb235 { background-color: #262626; } +.ef236 { color: #303030; } .eb236 { background-color: #303030; } +.ef237 { color: #3a3a3a; } .eb237 { background-color: #3a3a3a; } +.ef238 { color: #444444; } .eb238 { background-color: #444444; } +.ef239 { color: #4e4e4e; } .eb239 { background-color: #4e4e4e; } +.ef240 { color: #585858; } .eb240 { background-color: #585858; } +.ef241 { color: #626262; } .eb241 { background-color: #626262; } +.ef242 { color: #6c6c6c; } .eb242 { background-color: #6c6c6c; } +.ef243 { color: #767676; } .eb243 { background-color: #767676; } +.ef244 { color: #808080; } .eb244 { background-color: #808080; } +.ef245 { color: #8a8a8a; } .eb245 { background-color: #8a8a8a; } +.ef246 { color: #949494; } .eb246 { background-color: #949494; } +.ef247 { color: #9e9e9e; } .eb247 { background-color: #9e9e9e; } +.ef248 { color: #a8a8a8; } .eb248 { background-color: #a8a8a8; } +.ef249 { color: #b2b2b2; } .eb249 { background-color: #b2b2b2; } +.ef250 { color: #bcbcbc; } .eb250 { background-color: #bcbcbc; } +.ef251 { color: #c6c6c6; } .eb251 { background-color: #c6c6c6; } +.ef252 { color: #d0d0d0; } .eb252 { background-color: #d0d0d0; } +.ef253 { color: #dadada; } .eb253 { background-color: #dadada; } +.ef254 { color: #e4e4e4; } .eb254 { background-color: #e4e4e4; } +.ef255 { color: #eeeeee; } .eb255 { background-color: #eeeeee; } + +.f9 { color: #000000; } +.b9 { background-color: #FFFFFF; } +.f9 > .bold,.bold > .f9, body.f9 > pre > .bold { + /* Bold is heavy black on white, or bright white + depending on the default background */ + color: #000000; + font-weight: bold; +} +.reverse { + /* CSS doesnt support swapping fg and bg colours unfortunately, + so just hardcode something that will look OK on all backgrounds. */ + color: #000000; background-color: #AAAAAA; +} +.underline { text-decoration: underline; } +.line-through { text-decoration: line-through; } +.blink { text-decoration: blink; } + +</style> +</head> + +<body class="f9 b9"> +<pre> +GCTATAG<span class="bold"><span class="f4">AAATGT</span></span>TAACATCGAATGTACATTATAAC +<span class="f6">--</span> +CAGCTAACAATC<span class="bold"><span class="f4">AAGCGT</span></span>TACAGATTAGCCCCCCCC +<span class="f6">--</span> +GAACTTGCGTAACGTACAAAAATGCAAGCA<span class="bold"><span class="f4">AAAAGT</span></span> +<span class="f6">--</span> +GCTCTGTTAATCTAGA<span class="bold"><span class="f4">AAATGT</span></span>GTCTCCCCCCCCCC +<span class="f6">--</span> +<span class="bold"><span class="f4">AATCGT</span></span>ATAGCTCGGGCCGGATACTAGTACACCCCC +<span class="f6">--</span> +GATATAGTGGATAACTAATGCTCCCCCAG<span class="bold"><span class="f4">AACTGT</span></span>T +<span class="f6">--</span> +GAACGGACTATAGCCGGTATCCAAACAT<span class="bold"><span class="f4">AAATGT</span></span>TC +</pre> +</body> +</html>
--- a/tool_dependencies.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/tool_dependencies.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,18 +1,37 @@ <?xml version="1.0"?> <tool_dependency> - <package name="gnu_coreutils" version="8.21"> - <repository changeset_revision="83be2b421d3b" name="package_gnu_coreutils_8_21" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <package name="gnu_coreutils" version="8.22"> + <repository changeset_revision="b638666e399d" name="package_gnu_coreutils_8_22" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="gnu_awk" version="4.1.0"> - <repository changeset_revision="cbe9f1c8c98b" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="cbe9f1c8c98b" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="gnu_grep" version="2.14"> - <repository changeset_revision="af98f72cd785" name="package_gnu_grep_2_14" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="af98f72cd785" name="package_gnu_grep_2_14" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="gnu_sed" version="4.2.2-sandbox"> - <repository changeset_revision="4a4691c78042" name="package_gnu_sed_4_2_2_sandbox" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="4a4691c78042" name="package_gnu_sed_4_2_2_sandbox" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <set_environment version="1.0"> <environment_variable action="set_to" name="TP_SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable> </set_environment> + <package name="perl" version="5.18.1"> + <repository changeset_revision="a1a111b9faa5" name="package_perl_5_18" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="text_processing_perl_packages" version="1.0"> + <install version="1.0"> + <actions> + <action type="setup_perl_environment"> + <repository changeset_revision="a1a111b9faa5" name="package_perl_5_18" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> + <package name="perl" version="5.18.1" /> + </repository> + <!-- allow downloading and installing an Perl package from cpan.org--> + <package>https://cpan.metacpan.org/authors/id/S/SA/SALVA/Sort-Key-1.33.tar.gz</package> + </action> + </actions> + </install> + <readme> + Needed perl packages. + </readme> + </package> </tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unfold_column.py Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,14 @@ +#!/usr/bin/env python + +import sys + +out = open(sys.argv[4], 'w+') + +with open(sys.argv[1]) as handle: + for line in handle: + cols = line.split('\t') + unfolding_column = int(sys.argv[2]) - 1 + column_content = cols[ unfolding_column ] + for elem in column_content.split( sys.argv[3] ): + out.write( '\t'.join( cols[:unfolding_column] + [elem] + cols[unfolding_column+1:]) ) +out.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unfold_column.xml Wed Jan 07 11:10:52 2015 -0500 @@ -0,0 +1,63 @@ +<tool id="tp_unfold_column_tool" name="Unfold" version="@BASE_VERSION@.0"> + <description>columns from a table</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command interpreter="python"> +<![CDATA[ + unfold_column.py + '${infile}' + $column + "$delimiter" + '${outfile}' +]]> + </command> + <inputs> + <param name="infile" format="tabular" type="data" label="File to unfold" /> + <param name="column" type="data_column" data_ref="infile" accept_default="true" label="Column to use for unfolding" /> + <param name="delimiter" type="select" label="Values in column are delimited by"> + <option value=" ">Whitespace</option> + <option value=".">Dot</option> + <option value=",">Comma</option> + <option value="-">Dash</option> + <option value="_">Underscore</option> + <option value="|">Pipe</option> + </param> + </inputs> + <outputs> + <data format="tabular" name="outfile" /> + </outputs> + <tests> + <test> + <param name="infile" value="unfold_column1.tabular" ftype="tabular"/> + <param name="delimiter" value=" "/> + <param name="column" value="3"/> + <output name="outfile" file="unfold_column_result1.tabular" ftype="tabular"/> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool will unfold one column of your input dataset. + +----- + +Input Example:: + + a b 1,2,3,4,5 c + + +Output Example:: + + a b 1 c + a b 2 c + a b 3 c + a b 4 c + a b 5 c + +@REFERENCES@ +]]> + </help> +</tool>
--- a/unsorted_uniq.py Sun Oct 06 08:22:36 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -import sys -import subprocess - -""" - We only need that file because galaxy do not understand the -t $'\t' term. - Otherwise that would be the right XML-only solution: - sort -u - $ignore_case - $is_numeric - -t \$'\t' - #if $adv_opts.adv_opts_selector=="advanced": - -k$adv_opts.column_start,$adv_opts.column_end - #end if - -o $outfile - $input -""" - -if sys.argv[1].strip() != 'false': - ignore_case = sys.argv[1] -else: - ignore_case = '' - -if sys.argv[2].strip() != 'false': - is_numeric = sys.argv[2] -else: - is_numeric = '' - -try: - col_start = sys.argv[3] - col_end = sys.argv[4] - com = "sort -u %s %s -t ' ' -k%s,%s -o %s %s" % (is_numeric, ignore_case, col_start, col_end, sys.argv[5], sys.argv[6]) -except: - # no advanced options selected - com = "sort -u %s %s -t ' ' -o %s %s" % (is_numeric, ignore_case, sys.argv[3], sys.argv[4]) - -subprocess.call(com, shell=True)
--- a/unsorted_uniq.xml Sun Oct 06 08:22:36 2013 -0400 +++ b/unsorted_uniq.xml Wed Jan 07 11:10:52 2015 -0500 @@ -1,44 +1,57 @@ -<tool id="tp_sorted_uniq" name="Unique" version="0.3"> +<tool id="tp_sorted_uniq" name="Unique" version="@BASE_VERSION@.0"> <description>occurrences of each record</description> - <requirements> - <requirement type="package" version="8.21">gnu_coreutils</requirement> - </requirements> - <command interpreter='python'> - unsorted_uniq.py - $ignore_case - $is_numeric - #if $adv_opts.adv_opts_selector=="advanced": - $adv_opts.column_start - $adv_opts.column_end - #end if - $outfile - $infile + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>sort --version | head -n 1</version_command> + <command> +<![CDATA[ + sort -u + $ignore_case + $is_numeric + -t ' ' + #if $adv_opts.adv_opts_selector == "advanced": + -k$adv_opts.column_start,$adv_opts.column_end + #end if + -o "$outfile" + "$infile" +]]> </command> <inputs> - <param name="infile" type="data" format="tabular,txt" label="File to scan for unique values" /> - <param name="ignore_case" type="boolean" label="ignore differences in case when comparing (-f)" truevalue="-f" falsevalue="false" checked="false"/> - <param name="is_numeric" type="boolean" label="column only contains numeric values (-n)" truevalue="-n" falsevalue="false" checked="false" /> + <param name="infile" type="data" format="tabular" label="File to scan for unique values" /> + <param name="ignore_case" type="boolean" truevalue="-f" falsevalue="" checked="False" + label="Ignore differences in case when comparing" help="(-f)"/> + <param name="is_numeric" type="boolean" truevalue="-n" falsevalue="" checked="False" + label="Column only contains numeric values" help="(-n)" /> <conditional name="adv_opts"> <param name="adv_opts_selector" type="select" label="Advanced Options"> - <option value="basic" selected="True">Hide Advanced Options</option> - <option value="advanced">Show Advanced Options</option> + <option value="basic" selected="True">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> </param> <when value="basic" /> <when value="advanced"> - <param name="column_start" label="Column start" type="data_column" data_ref="infile" help="Unique on specific column range"/> - <param name="column_end" label="Column end" type="data_column" data_ref="infile" help="Unique on specific column range"/> + <param name="column_start" type="data_column" data_ref="infile" label="Column start" help="Unique on specific column range"/> + <param name="column_end" type="data_column" data_ref="infile" label="Column end" help="Unique on specific column range"/> </when> </conditional> </inputs> <outputs> - <data format="infile" name="outfile" metadata_source="infile"/> + <data format="input" name="outfile" metadata_source="infile"/> </outputs> <tests> <test> + <param name="infile" value="1.bed"/> + <param name="is_numeric" value="True"/> + <param name="ignore_case" value="True"/> + <param name="adv_opts_selector" value="advanced"/> + <param name="column_start" value="2"/> + <param name="column_end" value="3"/> + <output name="outfile" file="unique_results1.bed"/> </test> </tests> <help> - +<![CDATA[ .. class:: infomark **Syntax** @@ -74,6 +87,7 @@ chr2 1000 1900 gene5 chr3 15 1656 gene6 - +@REFERENCES@ +]]> </help> </tool>