Mercurial > repos > bgruening > text_processing
changeset 1:a4ad586d1403 draft
Uploaded
author | bgruening |
---|---|
date | Thu, 05 Sep 2013 11:42:27 -0400 |
parents | ec66f9d90ef0 |
children | fc862d5bccaf |
files | awk.xml cut.xml find_and_replace.xml grep.xml head.xml multijoin.xml readme.rst sed.xml sort.xml sorted_uniq.xml tail.xml test-data/1.bed test-data/eq-cut.dat test-data/eq-showbeginning.dat test-data/eq-showtail.dat test-data/sort_in1.bed test-data/sort_in2.bed test-data/sort_out1.bed test-data/sort_out2.bed test-data/sort_out3.bed unsorted_uniq.xml |
diffstat | 21 files changed, 560 insertions(+), 82 deletions(-) [+] |
line wrap: on
line diff
--- a/awk.xml Thu Sep 05 04:58:21 2013 -0400 +++ b/awk.xml Thu Sep 05 11:42:27 2013 -0400 @@ -1,5 +1,5 @@ -<tool id="unixtools_awk_tool" name="Awk" version="0.1.1"> - <description></description> +<tool id="unixtools_awk_tool" name="Data-reformatting" version="0.1.1"> + <description>(awk)</description> <requirements> <requirement type="package" version="4.1.0">gnu_awk</requirement> </requirements> @@ -26,15 +26,14 @@ </test> </tests> <outputs> - <data format="input" name="output" metadata_source="input1" - /> + <data format="input" name="output" metadata_source="input"/> </outputs> <configfiles> <configfile name="awk_script"> $url_paste </configfile> </configfiles> -<help> + <help> **What it does** @@ -42,7 +41,10 @@ .. class:: infomark -**TIP:** This tool uses the **extended regular** expression syntax (not the perl syntax). +**TIP:** + +This tool uses the **extended regular** expression syntax (not the perl syntax). +**\\d**, **\\w**, **\\s** etc. are **not** supported. **Further reading** @@ -65,8 +67,6 @@ - - **Pattern Examples** - **$2 == "chr3"** will match lines whose second column is the string 'chr3' @@ -87,12 +87,6 @@ - - - - - - **AWK's Regular Expression Syntax** The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. @@ -118,7 +112,5 @@ - **\|** Separates alternate possibilities. -**Note**: AWK uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported. - -</help> + </help> </tool>
--- a/cut.xml Thu Sep 05 04:58:21 2013 -0400 +++ b/cut.xml Thu Sep 05 11:42:27 2013 -0400 @@ -1,10 +1,10 @@ -<tool id="unixtools_cut_tool" name="cut" version="0.1.1"> - <description>columns from files</description> +<tool id="unixtools_cut_tool" name="Cut" version="0.1.1"> + <description>columns from a table</description> <requirements> <requirement type="package" version="8.21">gnu_coreutils</requirement> </requirements> <command> - cut ${complement} ${cutwhat} '${list}' '${input}' > '${output}' + cut -d"${delimiter}" ${complement} ${cut_element} '${list}' '${input}' > '${output}' </command> <inputs> @@ -14,7 +14,17 @@ <option value="--complement">Discard</option> </param> - <param name="cutwhat" type="select" label="Cut by"> + <param name="delimiter" type="select" label="Delimited by"> + <option value="">Tab</option> + <option value=" ">Whitespace</option> + <option value=".">Dot</option> + <option value=",">Comma</option> + <option value="-">Dash</option> + <option value="_">Underscore</option> + <option value="|">Pipe</option> + </param> + + <param name="cut_element" type="select" label="Cut by"> <option value="-f">fields</option> <option value="-c">characters</option> <option value="-b">bytes</option> @@ -28,27 +38,145 @@ </sanitizer> </param> </inputs> + <outputs> + <!--<data format="tabular" name="output" />--> + <!-- WIP, not sure that will work with the complement option --> + <data format="tabular" name="output"> + <actions> + <conditional name="cut_element"> + <when value="-f"> + <conditional name="delimiter"> + <when value="T"> + <conditional name="input"> + <when datatype_isinstance="interval"> + <action type="format" default="tabular"> + <option type="from_param" name="list" column="0" offset="0"> <!-- chromCol is 1--> + <filter type="insert_column" column="0" value="interval"/> + <filter type="insert_column" ref="list" /> <!-- startCol --> + <filter type="insert_column" ref="list" /> <!-- endCol --> + + <filter type="multiple_splitter" column="1" separator=","/> + <filter type="column_strip" column="1"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="1" name="lower" /> + <filter type="param_value" column="1" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="1" strip="c"/> <!-- get rid of c's --> + <filter type="boolean" column="1" cast="int" /> + + <filter type="multiple_splitter" column="2" separator=","/> + <filter type="column_strip" column="2"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="2" name="lower" /> + <filter type="param_value" column="2" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="2" strip="c"/> <!-- get rid of c's --> + <filter type="boolean" column="2" cast="int" /> + + <filter type="multiple_splitter" column="3" separator=","/> + <filter type="column_strip" column="3"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="3" name="lower" /> + <filter type="param_value" column="3" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="3" strip="c"/> <!-- get rid of c's --> + <filter type="boolean" column="3" cast="int" /> + + <filter type="metadata_value" ref="input" name="chromCol" column="1" /> + <filter type="metadata_value" ref="input" name="startCol" column="2" /> + <filter type="metadata_value" ref="input" name="endCol" column="3" /> + + </option> + </action> + + <conditional name="output"> + <when datatype_isinstance="interval"> + <action type="metadata" name="chromCol"> + <option type="from_param" name="list" column="0" offset="0"> <!-- chromCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="chromCol" column="1" /> + </option> + </action> + + <action type="metadata" name="startCol"> + <option type="from_param" name="list" column="0" offset="0"> <!-- startCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="startCol" column="1" /> + </option> + </action> + + <action type="metadata" name="endCol"> + <option type="from_param" name="list" column="0" offset="0"> <!-- endCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="endCol" column="1" /> + </option> + </action> + + <action type="metadata" name="nameCol" default="0"> + <option type="from_param" name="list" column="0" offset="0"> <!-- nameCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="nameCol" column="1" /> + </option> + </action> + + <action type="metadata" name="strandCol" default="0"> + <option type="from_param" name="list" column="0" offset="0"> <!-- strandCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="strandCol" column="1" /> + </option> + </action> + </when> + </conditional> + + </when> + </conditional> + </when> + </conditional> + </when> + </conditional> + </actions> + </data> + </outputs> <tests> <test> - <param name="input" value="unix_cut_input1.txt" /> - <output name="output" file="unix_cut_output1.txt" /> - <param name="complement" value="Keep" /> - <param name="cutwhat" value="fields" /> - <param name="list" value="1,3,4" /> + <param name="list" value="1,4,2,3"/> + <param name="delimiter" value="T"/> + <param name="input" value="1.bed"/> + <output name="output" file="eq-cut.dat"/> </test> <test> - <param name="input" value="unix_cut_input1.txt" /> - <output name="output" file="unix_cut_output1.txt" /> - <param name="complement" value="Discard" /> - <param name="cutwhat" value="fields" /> - <param name="list" value="2" /> + <param name="list" value="1,4,2-3" /> + <param name="delimiter" value="T" /> + <param name="input" value="1.bed" /> + <output name="output" file="eq-cut.dat" /> </test> </tests> - <outputs> - <data format="input" name="output" metadata_source="input" /> - </outputs> <help> **What it does** @@ -68,8 +196,6 @@ **-8** - Cut from the first to the eight field/characters. - - Input Example:: fruit color price weight
--- a/find_and_replace.xml Thu Sep 05 04:58:21 2013 -0400 +++ b/find_and_replace.xml Thu Sep 05 11:42:27 2013 -0400 @@ -45,7 +45,7 @@ help="Select this option if the first line contains column headers. Text in the line will not be replaced. " /> <conditional name="searchwhere"> - <param name="choice" type="select" label="Replace text in"> + <param name="choice" type="select" label="Find and Replace text in"> <option value="line" selected="true">entire line</option> <option value="column">specific column</option> </param>
--- a/grep.xml Thu Sep 05 04:58:21 2013 -0400 +++ b/grep.xml Thu Sep 05 11:42:27 2013 -0400 @@ -6,7 +6,7 @@ <requirement type="set_environment">UNIX_TOOLS_SCRIPT_PATH</requirement> </requirements> <command interpreter="sh"> - #if $color = "COLOR": + #if $color == "COLOR": GREP_COLOR='1;34' grep --color=always -P "$@" -- "${url_paste}" '${input}' | \$UNIX_TOOLS_SCRIPT_PATH/ansi2html.sh > "${output}" #else: grep -P "$@" -- "${url_paste}" '${input}' | grep -v "^--$" > "${output}"
--- a/head.xml Thu Sep 05 04:58:21 2013 -0400 +++ b/head.xml Thu Sep 05 11:42:27 2013 -0400 @@ -8,7 +8,7 @@ </command> <inputs> - <param format="txt" name="infile" type="data" label="file to cut" /> + <param format="txt" name="infile" type="data" label="File to select" /> <param name="complement" type="select" label="Operation"> <option value="">Keep first lines</option> @@ -27,11 +27,36 @@ <outputs> <data format="input" name="outfile" metadata_source="infile"/> </outputs> + <tests> + <test> + <param name="count" value="10"/> + <param name="infile" value="1.bed"/> + <output name="out_file1" file="eq-showbeginning.dat"/> + </test> + </tests> <help> **What it does** -This tool runs the **head** unix command, which discards lines from the end of a file. +This tool outputs specified number of lines from the **beginning** of a dataset + +----- + +**Example** + +Selecting 2 lines from this:: + + chr7 56632 56652 D17003_CTCF_R6 310 + + chr7 56736 56756 D17003_CTCF_R7 354 + + chr7 56761 56781 D17003_CTCF_R4 220 + + chr7 56772 56792 D17003_CTCF_R7 372 + + chr7 56775 56795 D17003_CTCF_R4 207 + + +will produce:: + + chr7 56632 56652 D17003_CTCF_R6 310 + + chr7 56736 56756 D17003_CTCF_R7 354 + + </help> </tool>
--- a/multijoin.xml Thu Sep 05 04:58:21 2013 -0400 +++ b/multijoin.xml Thu Sep 05 11:42:27 2013 -0400 @@ -14,7 +14,7 @@ </command> <inputs> - <repeat name="files" title="file to join"> + <repeat name="files" title="file to join" min="2"> <param name="filename" label="Add file" type="data" format="txt" /> </repeat> @@ -43,7 +43,7 @@ </inputs> <outputs> - <data name="output" format="input" metadata_source="input1" /> + <data name="output" format="input" metadata_source="input" /> </outputs> <help>
--- a/readme.rst Thu Sep 05 04:58:21 2013 -0400 +++ b/readme.rst Thu Sep 05 11:42:27 2013 -0400 @@ -79,6 +79,8 @@ - also shuf will get a major improved performance with large files http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=commit;h=20d7bce0f7e57d9a98f0ee811e31c757e9fedfff we can remove the random feature from sort and use shuf instead - move some advanced settings under a conditional, for example the cut tools offers to cut bytes +- cut wrapper has some output conditional magic for interval files, that needs to be checked +- comm wrapper, see the Galaxy default one
--- a/sed.xml Thu Sep 05 04:58:21 2013 -0400 +++ b/sed.xml Thu Sep 05 11:42:27 2013 -0400 @@ -3,24 +3,38 @@ <requirements> <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> </requirements> - <command> - sed --sandbox -r $silent -f '$sed_script' '$input' > '$output' - </command> - <inputs> - <param format="txt" name="input" type="data" label="File to process" /> + <command> + sed --sandbox -r + + #if $adv_opts.adv_opts_selector == 'advanced': + $adv_opts.silent + #end if + -f '$sed_script' '$input' > '$output' + </command> + <inputs> + <param format="txt" name="input" type="data" label="File to process" /> - <param name="url_paste" type="text" area="true" size="5x35" label="SED Program" help=""> - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - </valid> - </sanitizer> - </param> + <param name="url_paste" type="text" area="true" size="5x35" label="SED Program" help=""> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> - <param name="silent" type="select" label="operation mode" help="(Same as 'sed -n', leave at 'normal' unless you know what you're doing)" > - <option value="">normal</option> - <option value="-n">silent</option> - </param> + <conditional name="adv_opts"> + <param name="adv_opts_selector" type="select" label="Advanced Options"> + <option value="basic" selected="True">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> + </param> + <when value="basic" /> + <when value="advanced"> + <param name="silent" type="select" label="operation mode" help="(Same as 'sed -n', leave at 'normal' unless you know what you're doing)" > + <option value="">normal</option> + <option value="-n">silent</option> + </param> + </when> + </conditional> </inputs> <configfiles>
--- a/sort.xml Thu Sep 05 04:58:21 2013 -0400 +++ b/sort.xml Thu Sep 05 11:42:27 2013 -0400 @@ -1,4 +1,5 @@ <tool id="unixtools_sort_header_tool" name="Sort" version="0.1.1"> + <description>data in ascending or descending order</description> <requirements> <requirement type="package" version="8.21">gnu_coreutils</requirement> <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> @@ -25,16 +26,11 @@ <inputs> <param format="txt" name="infile" type="data" label="Sort Query" /> - <param name="header" type="integer" size="5" value="1" label="Number of header lines" help="These will be ignored during sort."> + <param name="header" type="integer" size="5" value="0" label="Number of header lines" help="These will be ignored during sort."> <validator type="in_range" message="Negative values are not allowed." min="0"/> </param> - <param name="unique" type="boolean" checked="false" truevalue="--unique" falsevalue="" - label="Output unique values" help="Print only unique values (based on sorted key columns. See help section for details." /> - - <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." /> - - <repeat name="sortkeys" title="sort key"> + <repeat name="sortkeys" title="Column selections" min="1"> <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" /> <param name="order" type="select" display="radio" label="in"> <option value="">Ascending order</option> @@ -49,12 +45,46 @@ <option value="R">Random order</option> </param> </repeat> + + <param name="unique" type="boolean" checked="false" truevalue="--unique" falsevalue="" + label="Output unique values" help="Print only unique values (based on sorted key columns. See help section for details." /> + + <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." /> + </inputs> - <tests> - </tests> <outputs> <data format="input" name="outfile" metadata_source="infile"/> </outputs> + <tests> + <!-- anyone knows how to write tests with repeat tags --> + <test> + <param name="infile" value="sort_in1.bed"/> + <param name="column" value="1"/> + <param name="style" value=""/> + <param name="order" value="ASC"/> + <param name="other_column" value="3"/> + <param name="other_style" value="n"/> + <param name="other_order" value="r"/> + <output name="out_file1" file="sort_out1.bed"/> + </test> + <test> + <param name="infile" value="sort_in1.bed"/> + <param name="column" value="1"/> + <param name="style" value=""/> + <param name="order" value="ASC"/> + <param name="other_column" value="3"/> + <param name="other_style" value="n"/> + <param name="other_order" value=""/> + <output name="out_file1" file="sort_out2.bed"/> + </test> + <test> + <param name="infile" value="sort_in2.bed"/> + <param name="column" value="5"/> + <param name="style" value="g"/> + <param name="order" value=""/> + <output name="out_file1" file="sort_out3.bed"/> + </test> + </tests> <help> **What it does**
--- a/sorted_uniq.xml Thu Sep 05 04:58:21 2013 -0400 +++ b/sorted_uniq.xml Thu Sep 05 11:42:27 2013 -0400 @@ -1,7 +1,8 @@ <tool id="unixtools_uniq_tool" name="Unique lines"> - <description>from sorted file</description> + <description>assuming sorted input file</description> <requirements> <requirement type="package" version="8.21">gnu_coreutils</requirement> + <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> </requirements> <command> uniq @@ -15,17 +16,23 @@ ## feature is not yet released, it will be in the next 8.22 version ##--group=$group + + #if $count: + # count will print the count with spaces infrontof the line and + # with a space (not a tab) after the number, we need to cahnge that + | sed -e 's/ *//' -e 's/ /\t/' > $output + #end if > $output </command> <inputs> - <param format="txt" name="input" type="data" label="File to scan for unique values" help="Make sure you have sorted this file" /> + <param format="txt,tabular" name="input" type="data" label="File to scan for unique values" help="Make sure you have sorted this file" /> - <param name="count" type="boolean" label="count [-c]" help="Prefix lines by the number of occurrences" truevalue="-c" falsevalue="" /> - <param name="repeated" type="boolean" label="repeated [-d]" help="Only print duplicate lines" truevalue="-d" falsevalue="" /> - <param name="ignorecase" type="boolean" label="ignore case [-i]" help="Ignore differences in case when comparing" truevalue="-i" falsevalue="" /> - <param name="uniqueonly" type="boolean" label="unique only [-u]" help="Only print unique lines" truevalue="-u" falsevalue="" /> - <param name="skipfields" type="integer" label="skip fields [-f]" help="Avoid comparing the first N fields. (use zero to start from the first field)" size="2" value="0" /> + <param name="count" type="boolean" label="Counting number of occurrences [-c]" help="Prefix lines by the number of occurrences" truevalue="-c" falsevalue="" /> + <param name="repeated" type="boolean" label="Only print duplicate lines [-d]" truevalue="-d" falsevalue="" /> + <param name="ignorecase" type="boolean" label="Ignore differences in case when comparing [-i]" truevalue="-i" falsevalue="" /> + <param name="uniqueonly" type="boolean" label="Only print unique lines [-u]" checked="True" truevalue="-u" falsevalue="" /> + <param name="skipfields" type="integer" label="Avoid comparing the first N fields [-f]" help="Use zero to start from the first field" size="2" value="0" /> <!-- <param name="group" type="select" label="Output all lines, and delimit each unique group.">
--- a/tail.xml Thu Sep 05 04:58:21 2013 -0400 +++ b/tail.xml Thu Sep 05 11:42:27 2013 -0400 @@ -4,22 +4,46 @@ <requirement type="package" version="8.21">gnu_coreutils</requirement> </requirements> <command> - tail --lines $count '$input1' > '$output' + tail --lines $count '$input' > '$output' </command> <inputs> - <param format="txt" name="input1" type="data" label="file to cut" /> + <param format="txt" name="input" type="data" label="file to cut" /> <param name="count" type="integer" size="5" value="10" label="Output last X lines" help="" /> </inputs> <outputs> - <data format="input" name="output" metadata_source="input1"/> + <data format="input" name="output" metadata_source="input"/> </outputs> + <tests> + <test> + <param name="count" value="10"/> + <param name="infile" value="1.bed"/> + <output name="out_file1" file="eq-showtail.dat"/> + </test> + </tests> <help> **What it does** -This tool runs the **tail** unix command, which discards lines from the beginning of a file. +This tool outputs specified number of lines from the **end** of a dataset + +----- + +**Example** + +- Input File:: + + chr7 57134 57154 D17003_CTCF_R7 356 - + chr7 57247 57267 D17003_CTCF_R4 207 + + chr7 57314 57334 D17003_CTCF_R5 269 + + chr7 57341 57361 D17003_CTCF_R7 375 + + chr7 57457 57477 D17003_CTCF_R3 188 + + +- Show last two lines of above file. The result is:: + + chr7 57341 57361 D17003_CTCF_R7 375 + + chr7 57457 57477 D17003_CTCF_R3 188 + </help> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.bed Thu Sep 05 11:42:27 2013 -0400 @@ -0,0 +1,65 @@ +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/eq-cut.dat Thu Sep 05 11:42:27 2013 -0400 @@ -0,0 +1,65 @@ +chr1 CCDS989.1_cds_0_0_chr1_147962193_r 147962192 147962580 +chr1 CCDS990.1_cds_0_0_chr1_147984546_f 147984545 147984630 +chr1 CCDS993.1_cds_0_0_chr1_148078401_r 148078400 148078582 +chr1 CCDS996.1_cds_0_0_chr1_148185137_f 148185136 148185276 +chr10 CCDS7248.1_cds_0_0_chr10_55251624_r 55251623 55253124 +chr11 CCDS8374.1_cds_0_0_chr11_116124408_r 116124407 116124501 +chr11 CCDS8377.1_cds_0_0_chr11_116206509_f 116206508 116206563 +chr11 CCDS8378.1_cds_0_0_chr11_116211734_r 116211733 116212337 +chr11 CCDS7726.1_cds_0_0_chr11_1812378_f 1812377 1812407 +chr12 CCDS8736.1_cds_0_0_chr12_38440095_r 38440094 38440321 +chr13 CCDS9526.1_cds_0_0_chr13_112381695_f 112381694 112381953 +chr14 CCDS9949.1_cds_0_0_chr14_98710241_r 98710240 98712285 +chr15 CCDS10096.1_cds_0_0_chr15_41486873_r 41486872 41487060 +chr15 CCDS10097.1_cds_0_0_chr15_41673709_f 41673708 41673857 +chr15 CCDS10098.1_cds_0_0_chr15_41679162_r 41679161 41679250 +chr15 CCDS10101.1_cds_0_0_chr15_41826030_f 41826029 41826196 +chr16 CCDS10397.1_cds_0_0_chr16_142909_f 142908 143003 +chr16 CCDS10401.1_cds_0_0_chr16_179964_r 179963 180135 +chr16 CCDS10402.1_cds_0_0_chr16_244414_f 244413 244681 +chr16 CCDS10403.1_cds_0_0_chr16_259269_r 259268 259383 +chr18 CCDS11891.1_cds_0_0_chr18_23786115_r 23786114 23786321 +chr18 CCDS11985.1_cds_0_0_chr18_59406882_f 59406881 59407046 +chr18 CCDS11986.1_cds_0_0_chr18_59455933_r 59455932 59456337 +chr18 CCDS11988.1_cds_0_0_chr18_59600587_f 59600586 59600754 +chr19 CCDS12866.1_cds_0_0_chr19_59068596_f 59068595 59069564 +chr19 CCDS12872.1_cds_0_0_chr19_59236027_r 59236026 59236146 +chr19 CCDS12877.1_cds_0_0_chr19_59297999_f 59297998 59298008 +chr19 CCDS12878.1_cds_0_0_chr19_59302169_r 59302168 59302288 +chr2 CCDS2120.1_cds_0_0_chr2_118288584_f 118288583 118288668 +chr2 CCDS2121.1_cds_0_0_chr2_118394149_r 118394148 118394202 +chr2 CCDS2441.1_cds_0_0_chr2_220190203_f 220190202 220190242 +chr2 CCDS2443.1_cds_0_0_chr2_220229610_r 220229609 220230869 +chr20 CCDS13249.1_cds_0_0_chr20_33330414_r 33330413 33330423 +chr20 CCDS13255.1_cds_0_0_chr20_33513607_f 33513606 33513792 +chr20 CCDS13256.1_cds_0_0_chr20_33579501_r 33579500 33579527 +chr20 CCDS13257.1_cds_0_0_chr20_33593261_f 33593260 33593348 +chr21 CCDS13614.1_cds_0_0_chr21_32707033_f 32707032 32707192 +chr21 CCDS13615.1_cds_0_0_chr21_32869642_r 32869641 32870022 +chr21 CCDS13620.1_cds_0_0_chr21_33321041_f 33321040 33322012 +chr21 CCDS13625.1_cds_0_0_chr21_33744995_r 33744994 33745040 +chr22 CCDS13897.1_cds_0_0_chr22_30120224_f 30120223 30120265 +chr22 CCDS13898.1_cds_0_0_chr22_30160420_r 30160419 30160661 +chr22 CCDS13901.1_cds_0_0_chr22_30665274_f 30665273 30665360 +chr22 CCDS13903.1_cds_0_0_chr22_30939055_r 30939054 30939266 +chr5 CCDS4149.1_cds_0_0_chr5_131424299_f 131424298 131424460 +chr5 CCDS4151.1_cds_0_0_chr5_131556602_r 131556601 131556672 +chr5 CCDS4152.1_cds_0_0_chr5_131621327_f 131621326 131621419 +chr5 CCDS4155.1_cds_0_0_chr5_131847542_r 131847541 131847666 +chr6 CCDS5061.1_cds_0_0_chr6_108299601_r 108299600 108299744 +chr6 CCDS5063.1_cds_0_0_chr6_108594663_f 108594662 108594687 +chr6 CCDS5064.1_cds_0_0_chr6_108640046_r 108640045 108640151 +chr6 CCDS5067.1_cds_0_0_chr6_108722977_f 108722976 108723115 +chr7 CCDS5760.1_cds_0_0_chr7_113660518_f 113660517 113660685 +chr7 CCDS5771.1_cds_0_0_chr7_116512160_r 116512159 116512389 +chr7 CCDS5773.1_cds_0_0_chr7_116714100_f 116714099 116714152 +chr7 CCDS5774.1_cds_0_0_chr7_116945542_r 116945541 116945787 +chr8 CCDS6324.1_cds_0_0_chr8_118881132_r 118881131 118881317 +chr9 CCDS6914.1_cds_0_0_chr9_128764157_f 128764156 128764189 +chr9 CCDS6915.1_cds_0_0_chr9_128787520_r 128787519 128789136 +chr9 CCDS6917.1_cds_0_0_chr9_128882428_f 128882427 128882523 +chr9 CCDS6919.1_cds_0_0_chr9_128937230_r 128937229 128937445 +chrX CCDS14606.1_cds_0_0_chrX_122745048_f 122745047 122745924 +chrX CCDS14733.1_cds_0_0_chrX_152648965_r 152648964 152649196 +chrX CCDS14735.1_cds_0_0_chrX_152691447_f 152691446 152691471 +chrX CCDS14736.1_cds_0_0_chrX_152694030_r 152694029 152694263
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/eq-showbeginning.dat Thu Sep 05 11:42:27 2013 -0400 @@ -0,0 +1,10 @@ +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/eq-showtail.dat Thu Sep 05 11:42:27 2013 -0400 @@ -0,0 +1,10 @@ +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_in1.bed Thu Sep 05 11:42:27 2013 -0400 @@ -0,0 +1,32 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, +chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, +chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, +chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, +chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, +chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, +chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, +chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790, +chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, +chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, +chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, +chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, +chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, +chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_in2.bed Thu Sep 05 11:42:27 2013 -0400 @@ -0,0 +1,6 @@ +chr10 100 200 feature1 100.01 + +chr20 800 900 feature2 1.1 + +chr2 500 600 feature3 1000.1 + +chr1 300 400 feature4 1.1e-05 + +chr21 300 500 feature5 1.1e2 + +chr15 700 800 feature6 1.1e4 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_out1.bed Thu Sep 05 11:42:27 2013 -0400 @@ -0,0 +1,32 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, +chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, +chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, +chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, +chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, +chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, +chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, +chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, +chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, +chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, +chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, +chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, +chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759, +chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, +chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, +chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_out2.bed Thu Sep 05 11:42:27 2013 -0400 @@ -0,0 +1,32 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, +chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, +chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, +chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, +chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, +chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, +chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, +chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, +chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, +chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, +chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, +chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, +chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, +chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790, +chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, +chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, +chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_out3.bed Thu Sep 05 11:42:27 2013 -0400 @@ -0,0 +1,6 @@ +chr1 300 400 feature4 1.1e-05 + +chr20 800 900 feature2 1.1 + +chr10 100 200 feature1 100.01 + +chr21 300 500 feature5 1.1e2 + +chr2 500 600 feature3 1000.1 + +chr15 700 800 feature6 1.1e4 +
--- a/unsorted_uniq.xml Thu Sep 05 04:58:21 2013 -0400 +++ b/unsorted_uniq.xml Thu Sep 05 11:42:27 2013 -0400 @@ -4,7 +4,7 @@ <requirement type="package" version="8.21">gnu_coreutils</requirement> </requirements> <command interpreter='python'> - unique_lines.py + unsorted_uniq.py $ignore_case $is_numeric #if $adv_opts.adv_opts_selector=="advanced": @@ -15,9 +15,9 @@ $infile </command> <inputs> - <param name="infile" type="data" format="tabular,text" label="from query" /> - <param name="ignore_case" type="boolean" label="ignore differences in case when comparing (-f)" truevalue="-f" falsevalue="false" checked="false" help="ignore differences in case when comparing"/> - <param name="is_numeric" type="boolean" label="column only contains numeric values (-n)" truevalue="-n" falsevalue="false" checked="false" help="did the calumn have numeric values"/> + <param name="infile" type="data" format="tabular,txt" label="File to scan for unique values" /> + <param name="ignore_case" type="boolean" label="ignore differences in case when comparing (-f)" truevalue="-f" falsevalue="false" checked="false"/> + <param name="is_numeric" type="boolean" label="column only contains numeric values (-n)" truevalue="-n" falsevalue="false" checked="false" /> <conditional name="adv_opts"> <param name="adv_opts_selector" type="select" label="Advanced Options"> <option value="basic" selected="True">Hide Advanced Options</option>