5
|
1 <tool id="PHYLO_filter" name="Filter and trim" version="1.2.0">
|
2
|
2 <description>sequences</description>
|
6
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
2
|
6 <requirements>
|
|
7 <requirement type="package">yapp_env</requirement>
|
|
8 </requirements>
|
6
|
9 <stdio>
|
|
10 <expand macro="basic_errors"/>
|
|
11 </stdio>
|
2
|
12 <version_command>seqmagick --version</version_command>
|
|
13 <command interpreter="bash">
|
|
14 filter-wrapper.sh ${config}
|
|
15 </command>
|
|
16 <inputs>
|
|
17 <!-- TODO: can take either fasta+qual or fastq -->
|
|
18 <param name="plate_id" type="integer" value="1" label="Plate number"/>
|
|
19 <param name="zone_id" type="integer" value="1" label="Zone number"/>
|
|
20 <param name="raw_seqs" type="data" format="fasta" label="Unfiltered sequences"/>
|
|
21 <param name="input_qual" type="data" format="qual" label="Sequence quality data"/>
|
|
22 <!-- TODO: handle MID format for multi-sample sequencing; see http://qiime.org/scripts/split_libraries.html -->
|
|
23 <param name="barcodes" type="data" format="csv" label="Barcodes"/>
|
|
24 <param name="primer" type="text" label="Primer" value="GCGGACTACCVGGGTATCTAAT" area="True" size="1x40"/>
|
|
25 <param name="min_length" type="integer" min="100" max="1000" value="350" label="Minimum sequence length"/>
|
|
26 <param name="min_quality" type="integer" min="0" max="63" value="35" label="Minimum mean sequence quality"/>
|
|
27 <param name="reverse_complement" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Reads uniformly correspond to negative strands"/>
|
|
28 </inputs>
|
|
29 <outputs>
|
|
30 <data name="filtered_seqs" format="fasta" label="Filtered sequences"/>
|
|
31 <data name="filter_report" format="tabular" label="Filtering report"/>
|
|
32 <data name="filter_details" format="data" label="Filtering details"/>
|
|
33 <data name="split_map" format="csv" label="Read-to-specimen map"/>
|
5
|
34 <data name="seq_qual_report" format="html" label="Sequence quality report"/>
|
2
|
35 </outputs>
|
|
36 <configfiles>
|
5
|
37 <configfile name="plate_json">
|
|
38 {
|
|
39 "plate": ${plate_id},
|
|
40 "name": "Plate ${plate_id}",
|
|
41 "zones": [
|
|
42 {
|
|
43 "zone": ${zone_id},
|
|
44 "cleaning_stats": "${filter_details}"
|
|
45 }
|
|
46 ]
|
|
47 }
|
|
48 </configfile>
|
2
|
49 <configfile name="config">
|
|
50 RAW_SEQS="${raw_seqs}"
|
|
51 INPUT_QUAL="${input_qual}"
|
|
52 BARCODES="${barcodes}"
|
|
53 PRIMER="${primer}"
|
|
54 MIN_LENGTH="${min_length}"
|
|
55 MIN_QUALITY="${min_quality}"
|
|
56 REVERSE_COMPLEMENT="${reverse_complement}"
|
5
|
57 PLATE_JSON="${plate_json}"
|
2
|
58
|
|
59 FILTERED_SEQS="${filtered_seqs}"
|
|
60 FILTER_REPORT="${filter_report}"
|
|
61 FILTER_DETAILS="${filter_details}"
|
|
62 SPLIT_MAP="${split_map}"
|
5
|
63 SQR="${seq_qual_report}"
|
|
64 SQR_DIR="${seq_qual_report.files_path}"
|
2
|
65 </configfile>
|
|
66 </configfiles>
|
|
67 <!-- The contents of the help tag is parsed as reStructuredText. Please see
|
|
68 help-template.rst for examples of commonly-used sections in other Galaxy
|
|
69 tools. -->
|
|
70 <help>
|
|
71
|
|
72 .. class:: infomark
|
|
73
|
|
74 **What it does**
|
|
75
|
|
76 This tool truncates and removes sequences that don’t match a set of quality
|
|
77 criteria, as well as mapping sequence barcodes to specimens. It takes input
|
|
78 sequences in FASTA format and a quality file, and outputs the filtered
|
5
|
79 sequences as well as a filtering summary and a sequence quality report.
|
2
|
80
|
|
81 The default quality filter settings are:
|
|
82
|
|
83 +---------------------------+------+
|
|
84 |parameter |value |
|
|
85 +===========================+======+
|
|
86 |--min-length |350 |
|
|
87 +---------------------------+------+
|
|
88 |--min-mean-quality |35 |
|
|
89 +---------------------------+------+
|
|
90 |--quality-window |30 |
|
|
91 +---------------------------+------+
|
|
92 |--quality-window-prop |0.9 |
|
|
93 +---------------------------+------+
|
|
94 |--quality-window-mean-qual |15 |
|
|
95 +---------------------------+------+
|
|
96
|
|
97 See seqmagick's `quality filter documentation`_ for full explanations of these
|
|
98 parameters.
|
|
99
|
|
100 .. _quality filter documentation: http://fhcrc.github.io/seqmagick/quality_filter.html
|
|
101
|
|
102 </help>
|
6
|
103 <citations>
|
|
104 <expand macro="cite_seqmagick"/>
|
|
105 <expand macro="cite_biopython"/>
|
|
106 </citations>
|
2
|
107 </tool>
|