annotate find_and_replace.xml @ 5:3f0e0d4c15a9 draft

Uploaded
author bgruening
date Wed, 07 Jan 2015 11:15:41 -0500
parents 56e80527c482
children 8928e6d1e7ba
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
1 <tool id="tp_find_and_replace" name="Replace" version="@BASE_VERSION@.0">
2
fc862d5bccaf Uploaded
bgruening
parents: 1
diff changeset
2 <description>parts of text</description>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
3 <macros>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
4 <import>macros.xml</import>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
5 </macros>
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
6 <command interpreter="perl">
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
7 <![CDATA[
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
8 find_and_replace
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
9 #if $searchwhere.searchwhere_select == "column":
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
10 -c $searchwhere.column
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
11 #end if
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
12 -o $outfile
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
13 $caseinsensitive
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
14 $wholewords
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
15 $skip_first_line
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
16 $is_regex
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
17 '$find_pattern'
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
18 '$replace_pattern'
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
19 '$infile'
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
20 ]]>
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
21 </command>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
22 <inputs>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
23 <param name="infile" format="txt" type="data" label="File to process" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
24 <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
25 <sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
26 <valid initial="string.printable">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
27 <remove value="&apos;"/>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
28 </valid>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
29 </sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
30 </param>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
31 <param name="replace_pattern" type="text" size="20" label="Replace with" help="Use simple text, or $&amp; (dollar-ampersand) and $1 $2 $3 to refer to matched text. See examples below." >
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
32 <sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
33 <valid initial="string.printable">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
34 <remove value="&apos;"/>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
35 </valid>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
36 </sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
37 </param>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
38
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
39 <param name="is_regex" type="boolean" checked="false" truevalue="-r" falsevalue=""
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
40 label="Find-Pattern is a regular expression" help="see help section for details." />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
41
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
42 <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue=""
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
43 label="Case-Insensitive search" help="" />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
44
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
45 <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue=""
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
46 label="Find whole-words" help="ignore partial matches (e.g. 'apple' will not match 'snapple')" />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
47
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
48 <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue=""
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
49 label="Ignore first line" help="Select this option if the first line contains column headers. Text in the line will not be replaced. " />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
50
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
51 <conditional name="searchwhere">
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
52 <param name="searchwhere_select" type="select" label="Find and Replace text in">
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
53 <option value="line" selected="true">entire line</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
54 <option value="column">specific column</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
55 </param>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
56 <when value="line" />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
57 <when value="column">
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
58 <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
59 </when>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
60 </conditional>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
61 </inputs>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
62 <outputs>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
63 <data format="input" name="outfile" metadata_source="infile" />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
64 </outputs>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
65 <tests>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
66 <test>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
67 <param name="infile" value="find_and_replace1.txt" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
68 <param name="find_pattern" value="day" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
69 <param name="replace_pattern" value="great day" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
70 <param name="is_regex" value="False" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
71 <param name="caseinsensitive" value="False" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
72 <param name="wholewords" value="True" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
73 <output name="outfile" file="find_and_replace_results1.txt" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
74 </test>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
75 <test>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
76 <param name="infile" value="find_and_replace2.txt" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
77 <param name="find_pattern" value="^chr" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
78 <param name="replace_pattern" value="" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
79 <param name="is_regex" value="True" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
80 <param name="caseinsensitive" value="False" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
81 <param name="wholewords" value="False" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
82 <param name="searchwhere_select" value="column" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
83 <param name="column" value="3" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
84 <output name="outfile" file="find_and_replace_results2.txt" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
85 </test>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
86 </tests>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
87 <help>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
88 <![CDATA[
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
89 **What it does**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
90
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
91 This tool finds $ replaces text in an input dataset.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
92
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
93 .. class:: infomark
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
94
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
95 The **pattern to find** can be a simple text string, or a perl **regular expression** string (depending on *pattern is a regex* check-box).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
96
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
97 .. class:: infomark
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
98
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
99 When using regular expressions, the **replace pattern** can contain back-references ( e.g. \\1 )
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
100
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
101 .. class:: infomark
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
102
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
103 This tool uses Perl regular expression syntax.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
104
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
105 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
106
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
107 **Examples of *regular-expression* Find Patterns**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
108
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
109 - **HELLO** The word 'HELLO' (case sensitive).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
110 - **AG.T** The letters A,G followed by any single character, followed by the letter T.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
111 - **A{4,}** Four or more consecutive A's.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
112 - **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
113 - **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
114
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
115
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
116 **Examples of Replace Patterns**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
117
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
118 - **WORLD** The word 'WORLD' will be placed whereever the find pattern was found.
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
119 - **FOO-$&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **$&amp;** (dollar-ampersand) represents the matched find pattern.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
120 - **$1** The text which matched the first parenthesis in the Find Pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
121
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
122
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
123 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
124
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
125 **Example 1**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
126
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
127 **Find Pattern:** HELLO
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
128 **Replace Pattern:** WORLD
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
129 **Regular Expression:** no
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
130 **Replace what:** entire line
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
131
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
132 Every time the word HELLO is found, it will be replaced with the word WORLD.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
133
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
134 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
135
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
136 **Example 2**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
137
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
138 **Find Pattern:** ^chr
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
139 **Replace Pattern:** (empty)
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
140 **Regular Expression:** yes
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
141 **Replace what:** column 11
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
142
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
143 If column 11 (of every line) begins with ther letters 'chr', they will be removed. Effectively, it'll turn "chr4" into "4" and "chrXHet" into "XHet"
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
144
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
145
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
146 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
147
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
148 **Perl's Regular Expression Syntax**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
149
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
150 The Find & Replace tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
151
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
152 - **( ) { } [ ] . * ? + \\ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
153 - **^** matches the beginning of a string(but not an internal line).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
154 - **(** .. **)** groups a particular pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
155 - **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
156
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
157 - **{n}** The preceding item is matched exactly n times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
158 - **{n,}** The preceding item ismatched n or more times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
159 - **{n,m}** The preceding item is matched at least n times but not more than m times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
160
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
161 - **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
162 - **.** Matches any single character except a newline.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
163 - ***** The preceding item will be matched zero or more times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
164 - **?** The preceding item is optional and matched at most once.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
165 - **+** The preceding item will be matched one or more times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
166 - **^** has two meaning:
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
167 - matches the beginning of a line or string.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
168 - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
169 - **$** matches the end of a line or string.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
170 - **\\|** Separates alternate possibilities.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
171 - **\\d** matches a single digit
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
172 - **\\w** matches a single letter or digit or an underscore.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
173 - **\\s** matches a single white-space (space or tabs).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
174
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
175 @REFERENCES@
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
176 ]]>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
177 </help>
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
178 </tool>