annotate find_and_replace.xml @ 0:ec66f9d90ef0 draft

initial uploaded
author bgruening
date Thu, 05 Sep 2013 04:58:21 -0400
parents
children a4ad586d1403
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
1 <tool id="cshl_find_and_replace" name="Find and Replace" version="0.1.1">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
2 <description>text</description>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
3 <command interpreter="perl">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
4 find_and_replace
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
5 #if $searchwhere.choice == "column":
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
6 -c $searchwhere.column
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
7 #end if
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
8 -o $output
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
9 $caseinsensitive
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
10 $wholewords
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
11 $skip_first_line
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
12 $is_regex
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
13 '$url_paste'
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
14 '$file_data'
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
15 '$input'
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
16 </command>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
17 <inputs>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
18 <param format="txt" name="input" type="data" label="File to process" />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
19 <param name="url_paste" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
20 <sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
21 <valid initial="string.printable">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
22 <remove value="&apos;"/>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
23 </valid>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
24 </sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
25 </param>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
26
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
27 <param name="file_data" type="text" size="20" label="Replace with" help="Use simple text, or $&amp; (dollar-ampersand) and $1 $2 $3 to refer to matched text. See examples below." >
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
28 <sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
29 <valid initial="string.printable">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
30 <remove value="&apos;"/>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
31 </valid>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
32 </sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
33 </param>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
34
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
35 <param name="is_regex" type="boolean" checked="false" truevalue="-r" falsevalue="" label="Find-Pattern is a regular expression"
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
36 help="see help section for details." />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
37
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
38 <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Case-Insensitive search"
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
39 help="" />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
40
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
41 <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue="" label="find whole-words"
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
42 help="ignore partial matches (e.g. 'apple' will not match 'snapple') " />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
43
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
44 <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Ignore first line"
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
45 help="Select this option if the first line contains column headers. Text in the line will not be replaced. " />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
46
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
47 <conditional name="searchwhere">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
48 <param name="choice" type="select" label="Replace text in">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
49 <option value="line" selected="true">entire line</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
50 <option value="column">specific column</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
51 </param>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
52 <when value="line" />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
53
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
54 <when value="column">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
55 <param name="column" label="in column" type="data_column" data_ref="input" accept_default="true" />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
56 </when>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
57 </conditional>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
58 </inputs>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
59
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
60 <outputs>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
61 <data format="input" name="output" metadata_source="input" />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
62 </outputs>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
63
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
64 <help>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
65
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
66 **What it does**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
67
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
68 This tool finds &amp; replaces text in an input dataset.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
69
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
70 .. class:: infomark
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
71
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
72 The **pattern to find** can be a simple text string, or a perl **regular expression** string (depending on *pattern is a regex* check-box).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
73
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
74 .. class:: infomark
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
75
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
76 When using regular expressions, the **replace pattern** can contain back-references ( e.g. \\1 )
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
77
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
78 .. class:: infomark
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
79
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
80 This tool uses Perl regular expression syntax.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
81
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
82 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
83
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
84 **Examples of *regular-expression* Find Patterns**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
85
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
86 - **HELLO** The word 'HELLO' (case sensitive).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
87 - **AG.T** The letters A,G followed by any single character, followed by the letter T.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
88 - **A{4,}** Four or more consecutive A's.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
89 - **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
90 - **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
91
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
92
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
93 **Examples of Replace Patterns**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
94
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
95 - **WORLD** The word 'WORLD' will be placed whereever the find pattern was found.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
96 - **FOO-&amp;-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **$&amp;** (dollar-ampersand) represents the matched find pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
97 - **$1** The text which matched the first parenthesis in the Find Pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
98
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
99
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
100 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
101
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
102 **Example 1**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
103
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
104 **Find Pattern:** HELLO
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
105 **Replace Pattern:** WORLD
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
106 **Regular Expression:** no
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
107 **Replace what:** entire line
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
108
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
109 Every time the word HELLO is found, it will be replaced with the word WORLD.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
110
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
111 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
112
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
113 **Example 2**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
114
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
115 **Find Pattern:** ^chr
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
116 **Replace Pattern:** (empty)
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
117 **Regular Expression:** yes
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
118 **Replace what:** column 11
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
119
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
120 If column 11 (of every line) begins with ther letters 'chr', they will be removed. Effectively, it'll turn "chr4" into "4" and "chrXHet" into "XHet"
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
121
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
122
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
123 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
124
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
125 **Perl's Regular Expression Syntax**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
126
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
127 The Find &amp; Replace tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
128
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
129 - **( ) { } [ ] . * ? + \\ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
130 - **^** matches the beginning of a string(but not an internal line).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
131 - **(** .. **)** groups a particular pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
132 - **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
133
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
134 - **{n}** The preceding item is matched exactly n times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
135 - **{n,}** The preceding item ismatched n or more times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
136 - **{n,m}** The preceding item is matched at least n times but not more than m times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
137
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
138 - **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
139 - **.** Matches any single character except a newline.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
140 - ***** The preceding item will be matched zero or more times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
141 - **?** The preceding item is optional and matched at most once.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
142 - **+** The preceding item will be matched one or more times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
143 - **^** has two meaning:
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
144 - matches the beginning of a line or string.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
145 - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
146 - **$** matches the end of a line or string.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
147 - **\\|** Separates alternate possibilities.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
148 - **\\d** matches a single digit
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
149 - **\\w** matches a single letter or digit or an underscore.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
150 - **\\s** matches a single white-space (space or tabs).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
151
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
152
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
153 </help>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
154
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
155 </tool>