annotate find_and_replace.xml @ 0:631dfde45073 draft default tip

First tool-shed public version
author gordon
date Tue, 09 Oct 2012 18:48:06 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
1 <tool id="cshl_find_and_replace" name="Find and Replace" version="0.1.1">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
2 <description>text</description>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
3 <command interpreter="perl">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
4 find_and_replace
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
5 #if $searchwhere.choice == "column":
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
6 -c $searchwhere.column
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
7 #end if
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
8 -o $output
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
9 $caseinsensitive
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
10 $wholewords
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
11 $skip_first_line
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
12 $is_regex
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
13 '$url_paste'
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
14 '$file_data'
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
15 '$input1'
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
16 </command>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
17 <inputs>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
18 <param format="txt" name="input1" type="data" label="File to process" />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
19
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
20 <param name="url_paste" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
21 <sanitizer>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
22 <valid initial="string.printable">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
23 <remove value="&apos;"/>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
24 </valid>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
25 </sanitizer>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
26 </param>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
27
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
28 <param name="file_data" type="text" size="20" label="Replace with" help="Use simple text, or $&amp; (dollar-ampersand) and $1 $2 $3 to refer to matched text. See examples below." >
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
29 <sanitizer>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
30 <valid initial="string.printable">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
31 <remove value="&apos;"/>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
32 </valid>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
33 </sanitizer>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
34 </param>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
35
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
36 <param name="is_regex" type="boolean" checked="false" truevalue="-r" falsevalue="" label="Find-Pattern is a regular expression"
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
37 help="see help section for details." />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
38
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
39 <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Case-Insensitive search"
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
40 help="" />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
41
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
42 <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue="" label="find whole-words"
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
43 help="ignore partial matches (e.g. 'apple' will not match 'snapple') " />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
44
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
45 <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Ignore first line"
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
46 help="Select this option if the first line contains column headers. Text in the line will not be replaced. " />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
47
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
48 <conditional name="searchwhere">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
49 <param name="choice" type="select" label="Replace text in">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
50 <option value="line" selected="true">entire line</option>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
51 <option value="column">specific column</option>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
52 </param>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
53
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
54 <when value="line">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
55 </when>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
56
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
57 <when value="column">
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
58 <param name="column" label="in column" type="data_column" data_ref="input1" accept_default="true" />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
59 </when>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
60 </conditional>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
61 </inputs>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
62
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
63 <outputs>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
64 <data format="input" name="output" metadata_source="input1"
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
65 />
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
66 </outputs>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
67
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
68 <help>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
69
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
70 **What it does**
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
71
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
72 This tool finds &amp; replaces text in an input dataset.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
73
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
74 .. class:: infomark
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
75
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
76 The **pattern to find** can be a simple text string, or a perl **regular expression** string (depending on *pattern is a regex* check-box).
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
77
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
78 .. class:: infomark
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
79
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
80 When using regular expressions, the **replace pattern** can contain back-references ( e.g. \\1 )
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
81
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
82 .. class:: infomark
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
83
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
84 This tool uses Perl regular expression syntax.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
85
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
86 -----
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
87
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
88 **Examples of *regular-expression* Find Patterns**
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
89
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
90 - **HELLO** The word 'HELLO' (case sensitive).
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
91 - **AG.T** The letters A,G followed by any single character, followed by the letter T.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
92 - **A{4,}** Four or more consecutive A's.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
93 - **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
94 - **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
95
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
96
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
97 **Examples of Replace Patterns**
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
98
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
99 - **WORLD** The word 'WORLD' will be placed whereever the find pattern was found.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
100 - **FOO-&amp;-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **$&amp;** (dollar-ampersand) represents the matched find pattern.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
101 - **$1** The text which matched the first parenthesis in the Find Pattern.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
102
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
103
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
104 -----
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
105
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
106 **Example 1**
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
107
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
108 **Find Pattern:** HELLO
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
109 **Replace Pattern:** WORLD
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
110 **Regular Expression:** no
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
111 **Replace what:** entire line
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
112
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
113 Every time the word HELLO is found, it will be replaced with the word WORLD.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
114
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
115 -----
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
116
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
117 **Example 2**
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
118
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
119 **Find Pattern:** ^chr
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
120 **Replace Pattern:** (empty)
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
121 **Regular Expression:** yes
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
122 **Replace what:** column 11
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
123
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
124 If column 11 (of every line) begins with ther letters 'chr', they will be removed. Effectively, it'll turn "chr4" into "4" and "chrXHet" into "XHet"
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
125
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
126
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
127 -----
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
128
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
129 **Perl's Regular Expression Syntax**
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
130
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
131 The Find &amp; Replace tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
132
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
133 - **( ) { } [ ] . * ? + \\ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
134 - **^** matches the beginning of a string(but not an internal line).
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
135 - **(** .. **)** groups a particular pattern.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
136 - **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
137
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
138 - **{n}** The preceding item is matched exactly n times.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
139 - **{n,}** The preceding item ismatched n or more times.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
140 - **{n,m}** The preceding item is matched at least n times but not more than m times.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
141
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
142 - **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
143 - **.** Matches any single character except a newline.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
144 - ***** The preceding item will be matched zero or more times.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
145 - **?** The preceding item is optional and matched at most once.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
146 - **+** The preceding item will be matched one or more times.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
147 - **^** has two meaning:
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
148 - matches the beginning of a line or string.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
149 - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
150 - **$** matches the end of a line or string.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
151 - **\\|** Separates alternate possibilities.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
152 - **\\d** matches a single digit
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
153 - **\\w** matches a single letter or digit or an underscore.
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
154 - **\\s** matches a single white-space (space or tabs).
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
155
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
156
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
157 </help>
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
158
631dfde45073 First tool-shed public version
gordon
parents:
diff changeset
159 </tool>