annotate convert.xml @ 3:d6ec32ce882b draft default tip

Uploaded
author wolma
date Tue, 28 Mar 2017 04:34:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
1 <tool id="convert" name="Convert" version="0.1.7.3">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
2 <description>between different sequence data formats</description>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
3 <macros>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
4 <import>toolshed_macros.xml</import>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
5 </macros>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
6 <expand macro="requirements" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
7 <version_command>mimodd version -q</version_command>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
8 <command>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
9 #if $str($mode.split_on_rgs) or $str($mode.oformat)=="fastq" or $str($mode.oformat)=="gz":
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
10 echo "Your input data is now getting processed by MiModD. The output will be split into several files based on the read groups found in the input.\nThis history item will remain in the busy state until the job is finished.\nAfter the job is showing as finished, Galaxy will start adding the results files to your history one by one.\n\nThis may take a while to complete! \n\nYou should refresh your history to see if new files have arrived.\n\nThis message is for your information only and can be deleted from the history once the job has finished." &gt; $output_split_on_read_groups;
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
11
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
12 mkdir converted_data;
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
13 #end if
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
14
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
15 mimodd convert
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
16
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
17 #for $i in $mode.input_list
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
18 "${i.file1}"
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
19 #if $str($mode.iformat) in ("fastq_pe", "gz_pe"):
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
20 "${i.file2}"
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
21 #end if
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
22 #end for
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
23 #if $str($mode.header) != "None":
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
24 --header "$(mode.header)"
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
25 #end if
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
26
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
27 #if $str($outputname) == "None":
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
28 --ofile converted_data/read_group
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
29 #else
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
30 --ofile "$outputname"
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
31 #end if
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
32 --iformat $(mode.iformat)
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
33 --oformat $(mode.oformat)
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
34 ${mode.split_on_rgs}
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
35 </command>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
36
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
37 <inputs>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
38 <conditional name="mode">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
39 <param help="Your choice will update the interface to display further choices appropriate for your type of input data." label="input file format" name="iformat" type="select">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
40 <option value="fastq">fastq: single-end (one file)</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
41 <option value="fastq_pe">fastq: paired-end (two files)</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
42 <option value="gz">gzip compressed fastq: single-end (one file)</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
43 <option value="gz_pe">gzip compressed fastq: paired-end (two files)</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
44 <option value="sam">sam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
45 <option value="bam">bam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
46 </param>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
47 <when value="fastq">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
48 <param label="output file format" name="oformat" type="select">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
49 <option value="sam">sam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
50 <option value="bam">bam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
51 </param>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
52 <repeat default="1" min="1" name="input_list" title="fastq input dataset">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
53 <param format="fastq" label="inputfile" name="file1" type="data" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
54 </repeat>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
55 <param format="sam" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file." label="Use Header File" name="header" type="data" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
56 <param name="split_on_rgs" type="hidden" value="" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
57 </when>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
58 <when value="fastq_pe">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
59 <param label="output file format" name="oformat" type="select">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
60 <option value="sam">sam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
61 <option value="bam">bam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
62 </param>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
63 <repeat default="1" min="1" name="input_list" title="fastq input datasets">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
64 <param format="fastq" label="inputfile with the first set of reads of paired-end data" name="file1" type="data" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
65 <param format="fastq" label="inputfile with the second set of reads of paired-end data" name="file2" type="data" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
66 </repeat>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
67 <param format="sam" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file." label="Use Header File" name="header" type="data" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
68 <param name="split_on_rgs" type="hidden" value="" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
69 </when>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
70 <when value="gz">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
71 <param label="output file format" name="oformat" type="select">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
72 <option value="sam">sam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
73 <option value="bam">bam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
74 </param>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
75 <repeat default="1" min="1" name="input_list" title="fastq.gz input dataset">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
76 <param format="data" label="inputfile" name="file1" type="data" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
77 </repeat>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
78 <param format="sam" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file." label="Use Header File" name="header" type="data" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
79 <param name="split_on_rgs" type="hidden" value="" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
80 </when>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
81 <when value="gz_pe">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
82 <param label="output file format" name="oformat" type="select">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
83 <option value="sam">sam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
84 <option value="bam">bam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
85 </param>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
86 <repeat default="1" min="1" name="input_list" title="fastq.gz input datasets">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
87 <param format="data" label="inputfile with the first set of reads of paired-end data" name="file1" type="data" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
88 <param format="data" label="inputfile with the second set of reads of paired-end data" name="file2" type="data" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
89 </repeat>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
90 <param format="sam" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file." label="Use Header File" name="header" type="data" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
91 <param name="split_on_rgs" type="hidden" value="" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
92 </when>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
93 <when value="sam">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
94 <param label="output file format" name="oformat" type="select">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
95 <option value="bam">bam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
96 <option value="sam">sam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
97 <option value="fastq">fastq</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
98 <option value="gz">gzipped fastq</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
99 </param>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
100 <repeat default="1" max="1" min="1" name="input_list" title="sam input dataset">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
101 <param format="sam" label="inputfile" name="file1" type="data" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
102 </repeat>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
103 <param name="header" type="hidden" value="None" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
104 <param checked="false" falsevalue="" help="If the input file contains reads from different read groups, write them to separate output files; implied automatically for conversions to fastq and gzipped fastq format" label="Split output based on read group IDs" name="split_on_rgs" truevalue="--split-on-rgs" type="boolean" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
105 </when>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
106 <when value="bam">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
107 <param label="output file format" name="oformat" type="select">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
108 <option value="sam">sam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
109 <option value="bam">bam</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
110 <option value="fastq">fastq</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
111 <option value="gz">gzipped fastq</option>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
112 </param>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
113 <repeat default="1" max="1" min="1" name="input_list" title="bam input dataset">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
114 <param format="bam" label="inputfile" name="file1" type="data" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
115 </repeat>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
116 <param name="header" type="hidden" value="None" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
117 <param checked="false" falsevalue="" help="If the input file contains reads from different read groups, write them to separate output files; implied automatically for conversions to fastq and gzipped fastq format" label="Split output based on read group IDs" name="split_on_rgs" truevalue="--split-on-rgs" type="boolean" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
118 </when>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
119 </conditional>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
120 </inputs>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
121
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
122 <outputs>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
123 <data format="bam" label="Converted reads from MiModd ${tool.name} on ${on_string}" name="outputname">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
124 <change_format>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
125 <when format="sam" input="mode.oformat" value="sam" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
126 </change_format>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
127 <filter>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
128 (not mode['split_on_rgs'] and mode['oformat'] not in ("fastq", "gz"))
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
129 </filter>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
130 </data>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
131
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
132 <data format="txt" label="MiModD ${tool.name} run on ${on_string}" name="output_split_on_read_groups">
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
133 <filter>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
134 (mode['split_on_rgs'] or mode['oformat'] in ("fastq", "gz"))
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
135 </filter>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
136 <discover_datasets directory="converted_data" pattern="__designation_and_ext__" visible="true" />
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
137 </data>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
138 </outputs>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
139
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
140 <help>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
141 .. class:: infomark
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
142
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
143 **What it does**
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
144
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
145 The tool converts between different file formats used for storing next-generation sequencing data.
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
146
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
147 As input file types it can handle uncompressed or gzipped fastq, SAM or BAM format, which it can convert to SAM or BAM format.
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
148
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
149 **Notes:**
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
150
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
151 1) In its standard configuration Galaxy will decompress any .gz files during their upload, so the option to convert gzipped fastq input is useful only with customized Galaxy instances or by using linked files as explained in our `recipe for using gzipped fastq files in Galaxy`_ from the `MiModD user guide`_.
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
152
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
153 2) The tool can convert fastq files representing data from paired-end sequencing runs to appropriate SAM/BAM format provided that the mate information is split over two fastq files in corresponding order.
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
154
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
155 **TIP:** If your paired-end data is arranged differently, you may look into the *fastq splitter* and *fastq de-interlacer* tools for Galaxy from the `Fastq Manipulation category`_ of the Galaxy Tool Shed to see if they can convert your files to the expected format.
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
156
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
157 3) Merging partial fastq (or gzipped fastq) files into a single SAM/BAM file is supported both for single-end and paired-end data. Simply add additional input datasets and select the appropriate files (pairs of files in case of paired-end data).
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
158
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
159 Concatenation of SAM/BAM file during conversion is currently not supported.
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
160
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
161 4) For input in fastq format a SAM header file providing run metadata **has to be specified**. The information in this file will be used as the header data of the new SAM/BAM file. You can use the *NGS Run Annotation* tool to generate a new header file for your data.
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
162
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
163 For input in SAM/BAM format the tool will simply copy the existing header data to the new file. To modify the header of an existing SAM/BAM file, use the *Reheader BAM file* tool instead.
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
164
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
165 .. _Fastq Manipulation category: https://toolshed.g2.bx.psu.edu/repository/browse_repositories_in_category?id=310ff67d4caf6531
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
166 .. _recipe for using gzipped fastq files in Galaxy: http://mimodd.readthedocs.org/en/latest/recipes.html#use-gzipped-fastq-files-in-galaxy
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
167 .. _MiModD user guide: http://mimodd.readthedocs.org/en/latest
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
168
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
169 </help>
d6ec32ce882b Uploaded
wolma
parents:
diff changeset
170 </tool>