annotate find_and_replace.xml @ 18:1e974b82380d draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 0ba37c1f33eeb1c77b4d9363d681fe522d9f7fe7
author bgruening
date Tue, 27 Feb 2018 17:10:53 -0500
parents 74aae7d6cb09
children 1aa30b2c73c9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
15
74aae7d6cb09 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 3103ebed1a420c7d3415b67ef532ea579edf9faa
bgruening
parents: 14
diff changeset
1 <tool id="tp_find_and_replace" name="Replace" version="@BASE_VERSION@.1">
2
fc862d5bccaf Uploaded
bgruening
parents: 1
diff changeset
2 <description>parts of text</description>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
3 <macros>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
4 <import>macros.xml</import>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
5 </macros>
13
3c685c4106b3 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 10
diff changeset
6 <requirements>
15
74aae7d6cb09 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 3103ebed1a420c7d3415b67ef532ea579edf9faa
bgruening
parents: 14
diff changeset
7 <requirement type="package" version="5.22.0.1">perl</requirement>
13
3c685c4106b3 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 10
diff changeset
8 </requirements>
3c685c4106b3 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 10
diff changeset
9 <command>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
10 <![CDATA[
13
3c685c4106b3 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b9d202134c3c6d0e5c398c3ae75e410067fcfc52
bgruening
parents: 10
diff changeset
11 perl '$__tool_directory__/find_and_replace'
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
12 #if $searchwhere.searchwhere_select == "column":
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
13 -c $searchwhere.column
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
14 #end if
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
15 -o $outfile
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
16 $caseinsensitive
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
17 $wholewords
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
18 $skip_first_line
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
19 $is_regex
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
20 '$find_pattern'
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
21 '$replace_pattern'
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
22 '$infile'
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
23 ]]>
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
24 </command>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
25 <inputs>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
26 <param name="infile" format="txt" type="data" label="File to process" />
10
c78b1767db2b planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 10052765d6b712cf7d38356af4251fcc38a339b6-dirty
bgruening
parents: 7
diff changeset
27 <param name="find_pattern" type="text" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
28 <sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
29 <valid initial="string.printable">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
30 <remove value="&apos;"/>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
31 </valid>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
32 </sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
33 </param>
10
c78b1767db2b planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 10052765d6b712cf7d38356af4251fcc38a339b6-dirty
bgruening
parents: 7
diff changeset
34 <param name="replace_pattern" type="text" label="Replace with"
6
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
35 help="Use simple text, or $&amp; (dollar-ampersand) and $1 $2 $3 to refer to matched text. See examples below." >
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
36 <sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
37 <valid initial="string.printable">
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
38 <remove value="&apos;"/>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
39 </valid>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
40 </sanitizer>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
41 </param>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
42 <param name="is_regex" type="boolean" checked="false" truevalue="-r" falsevalue=""
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
43 label="Find-Pattern is a regular expression" help="see help section for details." />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
44
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
45 <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue=""
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
46 label="Case-Insensitive search" help="" />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
47
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
48 <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue=""
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
49 label="Find whole-words" help="ignore partial matches (e.g. 'apple' will not match 'snapple')" />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
50
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
51 <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue=""
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
52 label="Ignore first line" help="Select this option if the first line contains column headers. Text in the line will not be replaced. " />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
53
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
54 <conditional name="searchwhere">
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
55 <param name="searchwhere_select" type="select" label="Find and Replace text in">
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
56 <option value="line" selected="true">entire line</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
57 <option value="column">specific column</option>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
58 </param>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
59 <when value="line" />
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
60 <when value="column">
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
61 <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
62 </when>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
63 </conditional>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
64 </inputs>
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
65 <outputs>
6
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
66 <data format_source="infile" name="outfile" metadata_source="infile" />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
67 </outputs>
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
68 <tests>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
69 <test>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
70 <param name="infile" value="find_and_replace1.txt" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
71 <param name="find_pattern" value="day" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
72 <param name="replace_pattern" value="great day" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
73 <param name="is_regex" value="False" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
74 <param name="caseinsensitive" value="False" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
75 <param name="wholewords" value="True" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
76 <output name="outfile" file="find_and_replace_results1.txt" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
77 </test>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
78 <test>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
79 <param name="infile" value="find_and_replace2.txt" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
80 <param name="find_pattern" value="^chr" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
81 <param name="replace_pattern" value="" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
82 <param name="is_regex" value="True" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
83 <param name="caseinsensitive" value="False" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
84 <param name="wholewords" value="False" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
85 <param name="searchwhere_select" value="column" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
86 <param name="column" value="3" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
87 <output name="outfile" file="find_and_replace_results2.txt" />
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
88 </test>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
89 </tests>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
90 <help>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
91 <![CDATA[
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
92 **What it does**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
93
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
94 This tool finds $ replaces text in an input dataset.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
95
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
96 .. class:: infomark
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
97
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
98 The **pattern to find** can be a simple text string, or a perl **regular expression** string (depending on *pattern is a regex* check-box).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
99
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
100 .. class:: infomark
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
101
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
102 When using regular expressions, the **replace pattern** can contain back-references ( e.g. \\1 )
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
103
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
104 .. class:: infomark
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
105
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
106 This tool uses Perl regular expression syntax.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
107
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
108 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
109
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
110 **Examples of *regular-expression* Find Patterns**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
111
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
112 - **HELLO** The word 'HELLO' (case sensitive).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
113 - **AG.T** The letters A,G followed by any single character, followed by the letter T.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
114 - **A{4,}** Four or more consecutive A's.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
115 - **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
116 - **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
117
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
118
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
119 **Examples of Replace Patterns**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
120
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
121 - **WORLD** The word 'WORLD' will be placed whereever the find pattern was found.
6
8928e6d1e7ba Uploaded
bgruening
parents: 4
diff changeset
122 - **FOO-$&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **$&** (dollar-ampersand) represents the matched find pattern.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
123 - **$1** The text which matched the first parenthesis in the Find Pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
124
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
125
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
126 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
127
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
128 **Example 1**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
129
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
130 **Find Pattern:** HELLO
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
131 **Replace Pattern:** WORLD
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
132 **Regular Expression:** no
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
133 **Replace what:** entire line
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
134
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
135 Every time the word HELLO is found, it will be replaced with the word WORLD.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
136
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
137 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
138
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
139 **Example 2**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
140
7
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
141 **Find Pattern:** ^chr
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
142 **Replace Pattern:** (empty)
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
143 **Regular Expression:** yes
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
144 **Replace what:** column 11
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
145
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
146 If column 11 (of every line) begins with ther letters 'chr', they will be removed. Effectively, it'll turn "chr4" into "4" and "chrXHet" into "XHet"
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
147
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
148
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
149 -----
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
150
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
151 **Perl's Regular Expression Syntax**
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
152
7
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
153 The Find & Replace tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
154
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
155 - **( ) { } [ ] . * ? + \\ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
156 - **^** matches the beginning of a string(but not an internal line).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
157 - **(** .. **)** groups a particular pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
158 - **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
159
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
160 - **{n}** The preceding item is matched exactly n times.
7
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
161 - **{n,}** The preceding item ismatched n or more times.
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
162 - **{n,m}** The preceding item is matched at least n times but not more than m times.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
163
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
164 - **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
165 - **.** Matches any single character except a newline.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
166 - ***** The preceding item will be matched zero or more times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
167 - **?** The preceding item is optional and matched at most once.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
168 - **+** The preceding item will be matched one or more times.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
169 - **^** has two meaning:
7
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
170 - matches the beginning of a line or string.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
171 - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
172 - **$** matches the end of a line or string.
7
d64eace4f9f3 Uploaded
bgruening
parents: 6
diff changeset
173 - **\\|** Separates alternate possibilities.
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
174 - **\\d** matches a single digit
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
175 - **\\w** matches a single letter or digit or an underscore.
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
176 - **\\s** matches a single white-space (space or tabs).
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
177
4
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
178 @REFERENCES@
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
179 ]]>
56e80527c482 Uploaded
bgruening
parents: 3
diff changeset
180 </help>
14
7725ab6dab67 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n'
bgruening
parents: 13
diff changeset
181 <expand macro="citations" />
0
ec66f9d90ef0 initial uploaded
bgruening
parents:
diff changeset
182 </tool>