comparison convert.xml @ 3:d6ec32ce882b draft default tip

Uploaded
author wolma
date Tue, 28 Mar 2017 04:34:04 -0400
parents
children
comparison
equal deleted inserted replaced
2:7f7028112439 3:d6ec32ce882b
1 <tool id="convert" name="Convert" version="0.1.7.3">
2 <description>between different sequence data formats</description>
3 <macros>
4 <import>toolshed_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <version_command>mimodd version -q</version_command>
8 <command>
9 #if $str($mode.split_on_rgs) or $str($mode.oformat)=="fastq" or $str($mode.oformat)=="gz":
10 echo "Your input data is now getting processed by MiModD. The output will be split into several files based on the read groups found in the input.\nThis history item will remain in the busy state until the job is finished.\nAfter the job is showing as finished, Galaxy will start adding the results files to your history one by one.\n\nThis may take a while to complete! \n\nYou should refresh your history to see if new files have arrived.\n\nThis message is for your information only and can be deleted from the history once the job has finished." &gt; $output_split_on_read_groups;
11
12 mkdir converted_data;
13 #end if
14
15 mimodd convert
16
17 #for $i in $mode.input_list
18 "${i.file1}"
19 #if $str($mode.iformat) in ("fastq_pe", "gz_pe"):
20 "${i.file2}"
21 #end if
22 #end for
23 #if $str($mode.header) != "None":
24 --header "$(mode.header)"
25 #end if
26
27 #if $str($outputname) == "None":
28 --ofile converted_data/read_group
29 #else
30 --ofile "$outputname"
31 #end if
32 --iformat $(mode.iformat)
33 --oformat $(mode.oformat)
34 ${mode.split_on_rgs}
35 </command>
36
37 <inputs>
38 <conditional name="mode">
39 <param help="Your choice will update the interface to display further choices appropriate for your type of input data." label="input file format" name="iformat" type="select">
40 <option value="fastq">fastq: single-end (one file)</option>
41 <option value="fastq_pe">fastq: paired-end (two files)</option>
42 <option value="gz">gzip compressed fastq: single-end (one file)</option>
43 <option value="gz_pe">gzip compressed fastq: paired-end (two files)</option>
44 <option value="sam">sam</option>
45 <option value="bam">bam</option>
46 </param>
47 <when value="fastq">
48 <param label="output file format" name="oformat" type="select">
49 <option value="sam">sam</option>
50 <option value="bam">bam</option>
51 </param>
52 <repeat default="1" min="1" name="input_list" title="fastq input dataset">
53 <param format="fastq" label="inputfile" name="file1" type="data" />
54 </repeat>
55 <param format="sam" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file." label="Use Header File" name="header" type="data" />
56 <param name="split_on_rgs" type="hidden" value="" />
57 </when>
58 <when value="fastq_pe">
59 <param label="output file format" name="oformat" type="select">
60 <option value="sam">sam</option>
61 <option value="bam">bam</option>
62 </param>
63 <repeat default="1" min="1" name="input_list" title="fastq input datasets">
64 <param format="fastq" label="inputfile with the first set of reads of paired-end data" name="file1" type="data" />
65 <param format="fastq" label="inputfile with the second set of reads of paired-end data" name="file2" type="data" />
66 </repeat>
67 <param format="sam" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file." label="Use Header File" name="header" type="data" />
68 <param name="split_on_rgs" type="hidden" value="" />
69 </when>
70 <when value="gz">
71 <param label="output file format" name="oformat" type="select">
72 <option value="sam">sam</option>
73 <option value="bam">bam</option>
74 </param>
75 <repeat default="1" min="1" name="input_list" title="fastq.gz input dataset">
76 <param format="data" label="inputfile" name="file1" type="data" />
77 </repeat>
78 <param format="sam" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file." label="Use Header File" name="header" type="data" />
79 <param name="split_on_rgs" type="hidden" value="" />
80 </when>
81 <when value="gz_pe">
82 <param label="output file format" name="oformat" type="select">
83 <option value="sam">sam</option>
84 <option value="bam">bam</option>
85 </param>
86 <repeat default="1" min="1" name="input_list" title="fastq.gz input datasets">
87 <param format="data" label="inputfile with the first set of reads of paired-end data" name="file1" type="data" />
88 <param format="data" label="inputfile with the second set of reads of paired-end data" name="file2" type="data" />
89 </repeat>
90 <param format="sam" help="A SAM file with header information, as generated, for example, by the NGS Run Annotation Tool, that will be used to attach metainformation to the results file." label="Use Header File" name="header" type="data" />
91 <param name="split_on_rgs" type="hidden" value="" />
92 </when>
93 <when value="sam">
94 <param label="output file format" name="oformat" type="select">
95 <option value="bam">bam</option>
96 <option value="sam">sam</option>
97 <option value="fastq">fastq</option>
98 <option value="gz">gzipped fastq</option>
99 </param>
100 <repeat default="1" max="1" min="1" name="input_list" title="sam input dataset">
101 <param format="sam" label="inputfile" name="file1" type="data" />
102 </repeat>
103 <param name="header" type="hidden" value="None" />
104 <param checked="false" falsevalue="" help="If the input file contains reads from different read groups, write them to separate output files; implied automatically for conversions to fastq and gzipped fastq format" label="Split output based on read group IDs" name="split_on_rgs" truevalue="--split-on-rgs" type="boolean" />
105 </when>
106 <when value="bam">
107 <param label="output file format" name="oformat" type="select">
108 <option value="sam">sam</option>
109 <option value="bam">bam</option>
110 <option value="fastq">fastq</option>
111 <option value="gz">gzipped fastq</option>
112 </param>
113 <repeat default="1" max="1" min="1" name="input_list" title="bam input dataset">
114 <param format="bam" label="inputfile" name="file1" type="data" />
115 </repeat>
116 <param name="header" type="hidden" value="None" />
117 <param checked="false" falsevalue="" help="If the input file contains reads from different read groups, write them to separate output files; implied automatically for conversions to fastq and gzipped fastq format" label="Split output based on read group IDs" name="split_on_rgs" truevalue="--split-on-rgs" type="boolean" />
118 </when>
119 </conditional>
120 </inputs>
121
122 <outputs>
123 <data format="bam" label="Converted reads from MiModd ${tool.name} on ${on_string}" name="outputname">
124 <change_format>
125 <when format="sam" input="mode.oformat" value="sam" />
126 </change_format>
127 <filter>
128 (not mode['split_on_rgs'] and mode['oformat'] not in ("fastq", "gz"))
129 </filter>
130 </data>
131
132 <data format="txt" label="MiModD ${tool.name} run on ${on_string}" name="output_split_on_read_groups">
133 <filter>
134 (mode['split_on_rgs'] or mode['oformat'] in ("fastq", "gz"))
135 </filter>
136 <discover_datasets directory="converted_data" pattern="__designation_and_ext__" visible="true" />
137 </data>
138 </outputs>
139
140 <help>
141 .. class:: infomark
142
143 **What it does**
144
145 The tool converts between different file formats used for storing next-generation sequencing data.
146
147 As input file types it can handle uncompressed or gzipped fastq, SAM or BAM format, which it can convert to SAM or BAM format.
148
149 **Notes:**
150
151 1) In its standard configuration Galaxy will decompress any .gz files during their upload, so the option to convert gzipped fastq input is useful only with customized Galaxy instances or by using linked files as explained in our `recipe for using gzipped fastq files in Galaxy`_ from the `MiModD user guide`_.
152
153 2) The tool can convert fastq files representing data from paired-end sequencing runs to appropriate SAM/BAM format provided that the mate information is split over two fastq files in corresponding order.
154
155 **TIP:** If your paired-end data is arranged differently, you may look into the *fastq splitter* and *fastq de-interlacer* tools for Galaxy from the `Fastq Manipulation category`_ of the Galaxy Tool Shed to see if they can convert your files to the expected format.
156
157 3) Merging partial fastq (or gzipped fastq) files into a single SAM/BAM file is supported both for single-end and paired-end data. Simply add additional input datasets and select the appropriate files (pairs of files in case of paired-end data).
158
159 Concatenation of SAM/BAM file during conversion is currently not supported.
160
161 4) For input in fastq format a SAM header file providing run metadata **has to be specified**. The information in this file will be used as the header data of the new SAM/BAM file. You can use the *NGS Run Annotation* tool to generate a new header file for your data.
162
163 For input in SAM/BAM format the tool will simply copy the existing header data to the new file. To modify the header of an existing SAM/BAM file, use the *Reheader BAM file* tool instead.
164
165 .. _Fastq Manipulation category: https://toolshed.g2.bx.psu.edu/repository/browse_repositories_in_category?id=310ff67d4caf6531
166 .. _recipe for using gzipped fastq files in Galaxy: http://mimodd.readthedocs.org/en/latest/recipes.html#use-gzipped-fastq-files-in-galaxy
167 .. _MiModD user guide: http://mimodd.readthedocs.org/en/latest
168
169 </help>
170 </tool>