annotate sequel_wrapper.xml @ 0:33f282b9dafe draft

Uploaded
author soranzo
date Fri, 18 Jul 2014 07:07:31 -0400
parents
children 9fee466ff7b0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
1 <tool id="sequel_wrapper" name="SEQuel" version="0.2">
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
2 <description></description>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
3 <requirements>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
4 <requirement type="package" version="0.6.2">bwa</requirement>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
5 <requirement type="package" version="35x1">blat</requirement>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
6 <requirement type="package" version="1.0.2">sequel</requirement>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
7 </requirements>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
8 <command interpreter="python">
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
9 sequel_wrapper.py -t \${GALAXY_SLOTS:-8} -p \${GALAXY_SLOTS:-8} -u 1
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
10 --sequel_jar_path=\$SEQUEL_JAR_PATH --read1=$read1 --read2=$read2 --contigs=$contigs
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
11 #if str($bases_length)
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
12 --bases_length=$bases_length
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
13 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
14 #if str($kmer_size)
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
15 --kmer_size=$kmer_size
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
16 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
17 #if str($max_positional_error)
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
18 --max_positional_error=$max_positional_error
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
19 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
20 #if str($min_fraction)
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
21 --min_fraction=$min_fraction
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
22 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
23 #if str($min_aln_length)
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
24 --min_aln_length=$min_aln_length
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
25 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
26 #if str($min_avg_coverage)
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
27 --min_avg_coverage=$min_avg_coverage
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
28 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
29 #if str($discard_kmers)
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
30 --discard_kmers=$discard_kmers
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
31 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
32 #if str($discard_positional)
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
33 --discard_positional=$discard_positional
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
34 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
35 #if str($min_aln_score)
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
36 --min_aln_score=$min_aln_score
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
37 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
38 #if $single_cell_mode
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
39 --single_cell_mode
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
40 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
41 #if $report_changes
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
42 --report_changes
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
43 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
44 #if $extend_contig
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
45 --extend_contig
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
46 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
47 #if $reference_genome
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
48 --reference_genome=$reference_genome
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
49 #end if
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
50 --contigs_refined=$contigs_refined
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
51 --logprep=$logprep
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
52 --logseq=$logseq
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
53 --logfile_prep=$logfile_prep
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
54 --logfile_seq=$logfile_seq
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
55 </command>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
56
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
57 <inputs>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
58 <param name="read1" type="data" format="fasta,fastq" label="Paired-end reads 1 from sequencing (-r1)" help="FASTA or FASTQ format" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
59 <param name="read2" type="data" format="fasta,fastq" label="Paired-end reads 2 from sequencing (-r2)" help="FASTA or FASTQ format" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
60 <param name="contigs" type="data" format="fasta,fastq" label="Contigs from assembly (-c)" help="FASTA or FASTQ format" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
61
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
62 <param name="bases_length" type="integer" value="0" optional="true" label="Preprocessing: do not refine contigs shorter than n bases (-l)" help="Contigs shorter than n bases will appear unchanged in the final output file" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
63
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
64 <param name="kmer_size" type="integer" value="50" optional="true" label="K-mer size (-k)" help="" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
65
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
66 <param name="max_positional_error" type="integer" value="25" optional="true" label="Max positional error Delta (-d)" help="" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
67
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
68 <param name="min_fraction" type="float" value="0.9" optional="true" label="Min fraction of matches in alignment (-f)" help="" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
69
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
70 <param name="min_aln_length" type="integer" value="" optional="true" label="Min alignment length (-l)" help="bp or fraction of contig. Optional." />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
71
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
72 <param name="min_avg_coverage" type="float" value="20.0" optional="true" label="Min average coverage to incorporate changes (-v)" help="" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
73
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
74 <param name="discard_kmers" type="integer" value="1" optional="true" label="Discard k-mers observed less than m times (-m)" help="" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
75
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
76 <param name="discard_positional" type="integer" value="1" optional="true" label="Discard positional k-mers observed less than n times (-n)" help="" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
77
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
78 <param name="min_aln_score" type="integer" value="1" optional="true" label="Min alignment score (MAPQ) of reads to consider (-q)" help="" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
79
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
80 <param name="single_cell_mode" type="boolean" checked="false" label="Single cell mode, sort partial-contigs by coverage (-s)" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
81
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
82 <param name="report_changes" type="boolean" checked="false" label="Report changes (slow) for all input-contigs (-r)" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
83
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
84 <param name="extend_contig" type="boolean" checked="false" label="Extend contig with flanking regions of alignment (-e)" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
85
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
86 <param name="reference_genome" type="data" format="fasta,twobit" optional="true" label="Evaluate refinement using reference genome (-g)" help="FASTA or 2bit format" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
87 </inputs>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
88
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
89 <outputs>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
90 <data name="logfile_prep" format="txt" label="${tool.name} on ${on_string}: log (pre-processing)" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
91 <data name="logfile_seq" format="txt" label="${tool.name} on ${on_string}: log (SEQuel)" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
92 <data name="logprep" format="txt" label="${tool.name} on ${on_string}: log (pre-processing, official)" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
93 <data name="logseq" format="txt" label="${tool.name} on ${on_string}: log (SEQuel, official)" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
94 <data name="contigs_refined" format="fasta" label="${tool.name} on ${on_string}: refined contigs" />
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
95 </outputs>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
96
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
97 <tests>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
98
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
99 </tests>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
100 <help>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
101 **What it does**
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
102
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
103 SEQuel is a tool for correcting errors (i.e., insertions, deletions, and substitutions) in contigs output from assembly. While assemblies of next generation sequencing (NGS) data are accurate, they still contain a substantial number of errors that need to be corrected after the assembly process. The algorithm behind SEQuel makes use of a graph structure called the positional de Bruijn graph, which models k-mers within reads while incorporating their approximate positions into the model.
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
104
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
105 SEQuel substantially reduces the number of small insertions, deletions and substitutions errors in assemblies of both standard (multi-cell) and single-cell sequencing data. SEQuel was tested mainly on Illumina sequence data, in combination with multiple NGS assemblers, such as Euler-SR, Velvet, SoapDeNovo, ALLPATHS and SPAdes.
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
106
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
107 **Known issues**
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
108
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
109 .. class:: warningmark
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
110
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
111 During the pre-processing stage, a SAM file per contig is created. Due to runtime considerations, these files are kept open simultaneously. The program will crash when the number of contigs in the assembly is too high.
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
112
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
113 **License and citation**
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
114
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
115 This Galaxy tool is Copyright © 2013-2014 `CRS4 Srl.`_ and is released under the `MIT license`_.
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
116
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
117 .. _CRS4 Srl.: http://www.crs4.it/
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
118 .. _MIT license: http://opensource.org/licenses/MIT
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
119
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
120 You can use this tool only if you agree to the license terms of: `SEQuel`_.
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
121
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
122 .. _SEQuel: http://bix.ucsd.edu/SEQuel/
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
123
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
124 If you use this tool, please cite:
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
125
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
126 - |Cuccuru2014|_
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
127 - |Ronen2012|_.
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
128
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
129 .. |Cuccuru2014| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2014) Orione, a web-based framework for NGS analysis in microbiology. *Bioinformatics* 30(13), 1928-1929
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
130 .. _Cuccuru2014: http://bioinformatics.oxfordjournals.org/content/30/13/1928
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
131 .. |Ronen2012| replace:: Ronen R., *et al.* (2012) SEQuel: improving the accuracy of genome assemblies. *Bioinformatics* 28 (12), i188-i196
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
132 .. _Ronen2012: http://bioinformatics.oxfordjournals.org/content/28/12/i188
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
133 </help>
33f282b9dafe Uploaded
soranzo
parents:
diff changeset
134 </tool>