Mercurial > repos > bgruening > prinseq
comparison prinseq.xml @ 0:9790cfb46d03 draft default tip
Uploaded
author | bgruening |
---|---|
date | Mon, 07 Oct 2013 15:34:32 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9790cfb46d03 |
---|---|
1 <tool id="prinseq_trimmer" name="FASTQ trimmer" version="0.1"> | |
2 <description>(prinseq)</description> | |
3 <version_command interpreter="perl">prinseq-lite.pl --version</version_command> | |
4 <requirements> | |
5 <requirement type="package" version="0.20.3">prinseq_perl_dependencies</requirement> | |
6 <requirement type="set_environment">PRINSEQ_SCRIPT_PATH</requirement> | |
7 </requirements> | |
8 <command> | |
9 #import os | |
10 temp_graph_file = `mktemp`; | |
11 | |
12 perl \$PRINSEQ_SCRIPT_PATH/prinseq-lite.pl | |
13 #if $seq_type.seq_type_opt == 'single': | |
14 -fastq $seq_type.input_singles | |
15 #if $seq_type.input_singles.ext == 'fastqillumina': | |
16 -phred64 | |
17 #end if | |
18 #else: | |
19 -fastq $seq_type.input_mate1 | |
20 -fastq2 $seq_type.input_mate2 | |
21 #if $seq_type.input_mate1.ext != $seq_type.input_mate2.ext: | |
22 #import sys | |
23 #silent sys.stderr.write( 'Both pairs from your paired-end library need to be from the same filetype.' ) | |
24 #end if | |
25 #if $seq_type.input_mate1.ext == 'fastqillumina': | |
26 -phred64 | |
27 -endif | |
28 #end if | |
29 | |
30 -out_good 'trimmed_reads' | |
31 ## we do not use the filter options in prinseq, so we are not interested in reads | |
32 ## that do not pass the filters | |
33 -out_bad null | |
34 | |
35 ## Trim options | |
36 #if $trim_to_len: | |
37 -trim_to_len $trim_to_len | |
38 #end if | |
39 | |
40 #if $trim_left: | |
41 -trim_left $trim_left | |
42 #end if | |
43 | |
44 #if $trim_right: | |
45 -trim_right | |
46 #end if | |
47 | |
48 #if $trim_qual_left or $trim_qual_right: | |
49 -trim_qual_type $trim_qual_type | |
50 -trim_qual_rule $trim_qual_rule | |
51 -trim_qual_window $trim_qual_window | |
52 -trim_qual_step $trim_qual_step | |
53 #end if | |
54 | |
55 #if $trim_qual_left: | |
56 -trim_qual_left $trim_qual_left | |
57 #end if | |
58 | |
59 #if $trim_qual_right: | |
60 -trim_qual_right $trim_qual_right | |
61 #end if | |
62 | |
63 | |
64 -graph_stats #echo ','.join( $graph_stats )# | |
65 | |
66 ## summary are written to stdout | |
67 -stats_all | |
68 | |
69 | |
70 -graph_data $temp_graph_file | |
71 | |
72 ; | |
73 | |
74 perl \$PRINSEQ_SCRIPT_PATH/prinseq-graphs-noPCA.pl -i $temp_graph_file -html_all -o #echo os.path.join( $html_file.files_path, 'graphs' )# | |
75 | |
76 ; | |
77 | |
78 python \$PRINSEQ_SCRIPT_PATH/create_index.py $html_file.files_path > $html_file | |
79 | |
80 | |
81 </command> | |
82 <inputs> | |
83 <conditional name="seq_type"> | |
84 <param name="seq_type_opt" type="select" label="Is this library paired- or single-end?"> | |
85 <option value="single">Single-end</option> | |
86 <option value="paired">Paired-end</option> | |
87 </param> | |
88 <when value="single"> | |
89 <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." /> | |
90 </when> | |
91 <when value="paired"> | |
92 <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." /> | |
93 <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." /> | |
94 </when> | |
95 </conditional> | |
96 | |
97 <param name="trim_to_len" type="integer" value="" | |
98 label="Trim all sequence from the 3'-end to result in sequence with this length" | |
99 help="(-trim_to_len)"/> | |
100 | |
101 <param name="trim_left" type="integer" value="" | |
102 label="Trim sequence at the 5'-end by trim_left positions" | |
103 help="(-trim_left)"/> | |
104 | |
105 <param name="trim_right" type="integer" value="" | |
106 label="Trim sequence at the 3'-end by trim_right positions" | |
107 help="(-trim_right)"/> | |
108 | |
109 <param name="trim_left_p" type="integer" value="" | |
110 label="Trim sequence at the 5'-end by trim_left_p percentage of read length." | |
111 help="The trim length is rounded towards the lower integer (e.g. 143.6 is rounded to 143 positions). Use an integer between 1 and 100 for the percentage value. (-trim_left_p)"/> | |
112 | |
113 <param name="trim_right_p" type="integer" value="" | |
114 label="Trim sequence at the 3'-end by trim_right_p percentage of read length" | |
115 help="The trim length is rounded towards the lower integer (e.g. 143.6 is rounded to 143 positions). Use an integer between 1 and 100 for the percentage value. (-trim_right_p)"/> | |
116 | |
117 <param name="trim_tail_left" type="integer" value="" | |
118 label="Trim poly-A/T tail with a minimum length of trim_tail_left at the 5'-end" | |
119 help="(-trim_tail_left)"/> | |
120 | |
121 <param name="trim_tail_right" type="integer" value="" | |
122 label="Trim poly-A/T tail with a minimum length of trim_tail_right at the 3'-end" | |
123 help="(-trim_tail_right)"/> | |
124 | |
125 <param name="trim_ns_left" type="integer" value="" | |
126 label="Trim poly-N tail with a minimum length of trim_ns_left at the 5'-end" | |
127 help="(-trim_left)"/> | |
128 | |
129 <param name="trim_ns_right" type="integer" value="" | |
130 label="Trim poly-N tail with a minimum length of trim_ns_right at the 3'-end." | |
131 help="(-trim_ns_right)"/> | |
132 | |
133 | |
134 <param name="trim_qual_left" type="integer" value="" | |
135 label=" Trim sequence by quality score from the 5'-end with this threshold score" | |
136 help="(-trim_qual_left)"/> | |
137 | |
138 <param name="trim_qual_right" type="integer" value="" | |
139 label="Trim sequence by quality score from the 3'-end with this threshold score" | |
140 help="(-trim_qual_right)"/> | |
141 | |
142 <param name="trim_qual_type" type="select" label="Type of quality score calculation to use"> | |
143 <option value="min" selected="True">min</option> | |
144 <option value="mean">mean</option> | |
145 <option value="max">max</option> | |
146 <option value="sum">sum</option> | |
147 </param> | |
148 | |
149 <param name="trim_qual_rule" type="select" label="Rule to use to compare quality score to calculated value."> | |
150 <option value="gt">greater than quality score</option> | |
151 <option value="lt" selected="True">less than quality score</option> | |
152 <option value="et">equal to quality score</option> | |
153 </param> | |
154 | |
155 <param name="trim_qual_window" type="integer" value="1" | |
156 label="The sliding window size used to calculate quality score by type" | |
157 help="(-trim_qual_window)"/> | |
158 | |
159 <param name="trim_qual_step" type="integer" value="1" | |
160 label="Step size used to move the sliding window" | |
161 help="To move the window over all quality scores without missing any, the step size should be less or equal to the window size(-trim_qual_step)"/> | |
162 | |
163 <param name="graph_stats" type="select" multiple="True" label="Which statistics should be calculated included in the graph_data file"> | |
164 <option value="ld" selected="True">Length distribution</option> | |
165 <option value="gc" selected="True">GC content distribution</option> | |
166 <option value="qd" selected="True">Base quality distribution</option> | |
167 <option value="ns" selected="True">Occurence of N</option> | |
168 <option value="pt" selected="True">Poly-A/T tails</option> | |
169 <option value="ts" selected="True">Tag sequence check</option> | |
170 <option value="as" selected="True">Assembly quality measure</option> | |
171 <option value="de" selected="True">Sequence duplication - exact only</option> | |
172 <option value="da" selected="True">Sequence duplication - exact + 5'/3'</option> | |
173 <option value="sc" selected="True">Sequence complexity</option> | |
174 <option value="dn" selected="True">Dinucleotide odds ratios, includes the PCA plots</option> | |
175 </param> | |
176 | |
177 | |
178 <!-- TODO | |
179 -log <file> | |
180 Log file to keep track of parameters, errors, etc. The log file | |
181 name is optional. If no file name is given, the log file name | |
182 will be "inputname.log". If the log file already exists, new | |
183 content will be added to the file. | |
184 --> | |
185 | |
186 | |
187 <outputs> | |
188 <data format="fastq" name="ofile_single" metadata_source="seq_type.input_singles" label="${tool.name} on ${on_string}"> | |
189 <filter>seq_type['seq_type_opt'] == "single"</filter> | |
190 </data> | |
191 | |
192 <data format="fastq" name="outfile_r1" label="${tool.name} on ${on_string}"> | |
193 <filter>seq_type['seq_type_opt'] == "paired"</filter> | |
194 <actions> | |
195 <conditional name="seq_type.seq_type_opt"> | |
196 <when value="single"> | |
197 <action type="format"> | |
198 <option type="from_param" name="seq_type.input_singles" param_attribute="ext" /> | |
199 </action> | |
200 </when> | |
201 <when value="paired"> | |
202 <action type="format"> | |
203 <option type="from_param" name="seq_type.input_mate1" param_attribute="ext" /> | |
204 </action> | |
205 </when> | |
206 </conditional> | |
207 </actions> | |
208 </data> | |
209 <data format="fastq" name="outfile_r2" label="${tool.name} on ${on_string}"> | |
210 <filter>seq_type['seq_type_opt'] == "paired"</filter> | |
211 <actions> | |
212 <conditional name="seq_type.seq_type_opt"> | |
213 <when value="single"> | |
214 <action type="format"> | |
215 <option type="from_param" name="seq_type.input_singles" param_attribute="ext" /> | |
216 </action> | |
217 </when> | |
218 <when value="paired"> | |
219 <action type="format"> | |
220 <option type="from_param" name="seq_type.input_mate1" param_attribute="ext" /> | |
221 </action> | |
222 </when> | |
223 </conditional> | |
224 </actions> | |
225 </data> | |
226 | |
227 <data format="html" name="html_file" label="${tool.name} on ${on_string} summary" /> | |
228 </outputs> | |
229 <tests> | |
230 <test> | |
231 <!-- grep a FASTA file for sequences with specific motif --> | |
232 <param name="seq_type.input_singles" value="example1.fastq" /> | |
233 <output name="ofile_single" file="example1_trim_right_10.fastq" /> | |
234 <param name="trim_right" value="10" /> | |
235 </test> | |
236 </tests> | |
237 <help> | |
238 | |
239 | |
240 .. class:: warningmark | |
241 | |
242 **TIP** | |
243 | |
244 ----- | |
245 | |
246 **What it does** | |
247 | |
248 | |
249 PRINSEQ is a tool that generates summary statistics of sequence and quality data and that is used to filter, reformat and trim next-generation sequence data. | |
250 | |
251 | |
252 http://prinseq.sourceforge.net/manual.html | |
253 | |
254 | |
255 ***** ORDER OF PROCESSING ***** | |
256 The available options are processed in the following order: | |
257 | |
258 seq_num, trim_left, trim_right, trim_left_p, trim_right_p, | |
259 trim_qual_left, trim_qual_right, trim_tail_left, | |
260 trim_tail_right, trim_ns_left, trim_ns_right, trim_to_len, | |
261 min_len, max_len, range_len, min_qual_score, max_qual_score, | |
262 min_qual_mean, max_qual_mean, min_gc, max_gc, range_gc, | |
263 ns_max_p, ns_max_n, noniupac, lc_method, derep, seq_id, | |
264 seq_case, dna_rna, out_format | |
265 | |
266 | |
267 | |
268 | |
269 </help> | |
270 </tool> |