|
3
|
1 <?xml version="1.0" encoding="utf-8"?>
|
|
6
|
2 <tool id="crac" name="CRAC" version="1.3.0">
|
|
3
|
3 <requirements>
|
|
|
4 <requirement type='package' version="1.3.0">crac</requirement>
|
|
|
5 </requirements>
|
|
|
6 <description>Analyzing RNAs in high-throughput sequencing data</description>
|
|
|
7 <command interpreter="bash"> crac_wrapper.sh
|
|
|
8 #if $Genome.which_genome == "prebuilt"
|
|
|
9 <!--1--> "$Genome.prebuilt_genome.fields.path"
|
|
|
10 #else
|
|
|
11 <!--1--> "$Genome.index_input.extra_files_path"
|
|
|
12 #end if
|
|
7
|
13 $compressed
|
|
3
|
14 <!--2--> $output_name.extra_files_path <!-- Usefull for submitting jobs on crac.sh-->
|
|
|
15 -r $input -k $kmer_length --read-length $read_length --sam $output_name
|
|
|
16 #if $condi_deep_snp.deepSNP == "yes"
|
|
|
17 --deep-snv --nb-nucleotides-snv-comparison $condi_deep_snp.nb_nucleotides_snp_comparison
|
|
|
18 #end if
|
|
|
19 #if $choixSettings.settings == "experimental"
|
|
|
20 --max-splice-length $choixSettings.max_splice_length
|
|
|
21 --max-bio-indel $choixSettings.max_bio_indel
|
|
|
22 --min-duplication $choixSettings.min_duplication
|
|
|
23 --max-duplication $choixSettings.max_duplication
|
|
|
24 --min-percent-single-loc $choixSettings.min_percent_single_loc
|
|
|
25 --min-percent-duplication-loc $choixSettings.min_percent_duplication_loc
|
|
|
26 --max-bases-randomly-matched $choixSettings.max_bases_randomly_matched
|
|
|
27 --max-extension-length $choixSettings.max_extension_length
|
|
|
28 --min-support-no-cover $choixSettings.min_support_no_cover
|
|
|
29 --min-break-length $choixSettings.min_break_length
|
|
|
30 #end if
|
|
7
|
31 $detailed_sam
|
|
3
|
32
|
|
|
33 </command>
|
|
|
34
|
|
|
35 <inputs>
|
|
|
36 <!-- Normal Setting -->
|
|
|
37
|
|
|
38 <conditional name="Genome"> <!-- Conditional 3 (Which genome) -->
|
|
|
39 <param name="which_genome" type="select" label="Do you want to use a pre-built reference genome or a Crac-index generated genome from your history?" help="Pre-built reference genomes are generated by Crac-index.">
|
|
|
40 <option value="prebuilt"> Use a pre-built reference genome </option>
|
|
|
41 <option value="history"> Use a Crac-index generated genome from my history</option>
|
|
|
42 </param>
|
|
|
43
|
|
|
44 <when value="prebuilt">
|
|
5
|
45 <!-- <param name="prebuilt_genome" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact authors"> -->
|
|
|
46 <!-- <options from_data_table="crac_indexes"> -->
|
|
|
47 <!-- <filter type="sort_by" column="2" /> -->
|
|
|
48 <!-- <validator type="no_options" message="No indexes are available" /> -->
|
|
|
49 <!-- </options> -->
|
|
|
50 <!-- </param> -->
|
|
3
|
51 </when>
|
|
|
52
|
|
|
53 <when value="history">
|
|
|
54 <param name="index_input" format="crac_index" type="data" label="Reference Genome" help="Select an indexed Genome from your history"/>
|
|
|
55 </when>
|
|
|
56 </conditional>
|
|
|
57
|
|
|
58
|
|
7
|
59 <param name="input" format="txt,raw,fasta,fastq" type="data" label="Reads file" help="Select a file"/>
|
|
6
|
60 <param name="kmer_length" type="integer" min="12" max="32" value="22" label="k-mer length">
|
|
3
|
61 <help>k-mer length must be carefully chosen. A k-mer of that length must map to a unique location in the genome with a high probability. Recommended value for the human genome: 22</help>
|
|
|
62 </param>
|
|
7
|
63 <param name="read_length" type="integer" label="Read length" value="0"
|
|
|
64 help="When all reads have the same length, set the read length to dramatically increase computation speed.
|
|
|
65 Default value (no read length considered): 0.
|
|
|
66 Note : If read length is set, shorter reads will be ignored, longer reads will be cut."/>
|
|
|
67 <param name="detailed_sam" type="select" format="text" label="Do you want a detailed sam output file ?" help="Detailed sam output file gives you information on the SNPs, splice junctions, sequencing errors, chimeras, ..." >
|
|
|
68 <option value=""> No, I don't want a detailed SAM output file </option>
|
|
|
69 <option value="--detailed-sam"> Yes, I want a detailed SAM output file </option>
|
|
3
|
70 </param>
|
|
7
|
71 <param name="compressed" type="select" display="radio" label="Compress output files?">
|
|
|
72 <option value="" selected="true">No</option>
|
|
|
73 <option value="--gz">Yes</option>
|
|
3
|
74 </param>
|
|
|
75 <conditional name="condi_deep_snp"> <!-- Conditional 1 -->
|
|
|
76 <param type="select" name="deepSNP" label="Search hard for SNPs?">
|
|
|
77 <option value="no" selected="true"> No, do not search hard for SNVs </option>
|
|
|
78 <option value="yes"> Yes, search hard for SNVs (takes more time) </option>
|
|
|
79 </param>
|
|
|
80 <when value="yes">
|
|
|
81 <param name="nb_nucleotides_snp_comparison" type="integer" value="8" label="Number of nucleotides for SNV comparison" help="Default value for human genome : 8. A smaller value will find more SNVs, but will be less accurate."/>
|
|
|
82 </when>
|
|
|
83 <when value="no"/> <!-- Suppress warnings -->
|
|
|
84 </conditional> <!-- End Conditional 1 -->
|
|
|
85
|
|
|
86 <!-- Experimental Setting-->
|
|
|
87 <conditional name="choixSettings"> <!-- Conditional 2 (setting choice) -->
|
|
|
88 <param name="settings" type="select" label="Advanced CRAC settings to use" help="If you want full control to optimize your experience, use Advanced Settings. Be careful, these settings are experimental and one single change can make Crac fail">
|
|
|
89 <option value="normal" selected="true"> Normal settings </option>
|
|
|
90 <option value="experimental"> Advanced Settings </option>
|
|
|
91 </param>
|
|
|
92 <when value="normal"/> <!-- Supress warnings-->
|
|
|
93 <when value="experimental"> <!-- Supress warnings-->
|
|
|
94 <param name="max_splice_length" type="integer" value="300000" label="Maximum splice length" help="Splices larger than this value, will not be considered as splices, but (if possible) as chimeras. Default value for human genome : 300,000 bp." />
|
|
|
95 <param name="max_bio_indel" type="integer" value="15" label="Maximum indel length. Larger indels will be considered as splice junctions" help=" HELP. Default value for human genome : 15 bp." />
|
|
|
96 <param name="min_duplication" type="integer" value="2" label="Minimum duplication occurrence" help=" Minimum number of k-mer occurrences in the genome to be considered as duplicated. Default value for human genome : 2." />
|
|
|
97 <param name="max_duplication" type="integer" value="9" label="Maximum duplication occurrence" help=" Maximum number of k-mer occurrences to be considered as duplicated. Default value for human genome : 9." />
|
|
|
98 <param name="min_percent_single_loc" type="float" value="0.15" label="Minimum unique location percentage" help=" Minimal percentage of k-mers that must be unique in the genome, to consider the read as unique. Default value for human genome : 0.15." />
|
|
|
99 <param name="min_percent_duplication_loc" type="float" value="0.20" label="Minimum duplicated location percentage" help=" Minimal percentage of k-mers that must be duplicated in the genome, to consider the read as duplicated. Default value for human genome : 0.20." />
|
|
|
100 <!--param name="min_percent_multiple_loc" type="float" value="0.20" label="Minimum percent multiple localisation" help=" HELP. Default value for human genome : 0.20." /-->
|
|
|
101 <param name="max_bases_randomly_matched" type="integer" value="10" label="Maximum bases randomly matched" help=" Maximum number of bases that can be considered as randomly matched. Default value for human genome : 10." />
|
|
|
102 <param name="max_bases_retrieved" type="integer" value="10" label="Maximum bases retrieved" help=" Maximum number of bases retrieved from the genome when outputting deletions. Default value for human genome : 10." />
|
|
|
103 <param name="max_extension_length" type="integer" value="10" label="Maximum extension length" help=" Maximal number of nucleotides visited to extend a break and to make sure that the location is consistent. Default value for human genome : 10." />
|
|
|
104 <param name="min_support_no_cover" type="float" value="1.30" label="Minimum suppot no cover" help=" Average coverage along the read to consider it as not covered. Default value for human genome : 1.30." />
|
|
|
105 <param name="min_break_length" type="float" value="0.5" label="Minimum break length" help=" Breaks shorter than this ratio times the k-mer length will be considered as too short and will be merged if necessary. Default value for human genome : 0.5." />
|
|
|
106 </when> <!-- End "when experimental" -->
|
|
|
107 </conditional> <!-- End Conditional 2 -->
|
|
|
108 </inputs>
|
|
|
109
|
|
|
110 <outputs>
|
|
|
111 <data name="output_name" format="sam" label="${tool.name} on ${on_string}: mapped reads" />
|
|
|
112 </outputs>
|
|
|
113
|
|
|
114 <tests>
|
|
|
115 <test>
|
|
|
116 </test>
|
|
|
117 </tests>
|
|
|
118
|
|
|
119 <help>
|
|
|
120 **What it does**
|
|
|
121
|
|
|
122 CRAC proposes a novel way of analyzing reads that integrates genomic locations
|
|
|
123 and local coverage, and delivers all above mentioned predictions in a single
|
|
|
124 step. CRAC uses a double k-mer profiling approach to detect candidate
|
|
|
125 mutations, indels, splice or fusion junctions in each single read.
|
|
|
126
|
|
|
127 .. _CRAC: http://crac.gforge.inria.fr/
|
|
|
128
|
|
|
129 If you use this tool, please cite:
|
|
|
130 - Philippe N., Salson M., Commes T., Rivals E., `"CRAC: an integrated approach to the analysis of RNA-seq reads"`__, Genome Biology (2013), 14:R30, doi: 10.1186/gb-2013-14-3-r30.
|
|
|
131
|
|
|
132 .. __: http://genomebiology.com/2013/14/3/R30/
|
|
|
133
|
|
|
134 ------
|
|
|
135
|
|
|
136 **Input formats**
|
|
|
137
|
|
|
138 CRAC accepts files in FASTA, FASTQ or any text format (txt, raw, ...).
|
|
|
139
|
|
|
140 ------
|
|
|
141
|
|
|
142 **Output**
|
|
|
143
|
|
|
144 The output is in SAM format. If you choose the detailed SAM output, CRAC adds several flags to tell more informations. You can see the details here: http://crac.gforge.inria.fr/index.php?id=sam-documentation
|
|
|
145
|
|
|
146
|
|
|
147 ------
|
|
|
148
|
|
|
149 **Crac settings**
|
|
|
150
|
|
|
151 Main options are displayed at the top of the page. If you're an experimented user, you can choose to display
|
|
|
152 the whole Crac setting. Most of the options in Crac have been implemented here.
|
|
|
153
|
|
|
154 ------
|
|
|
155 crac 1.3.0 Compiled on Sep 13 2013.
|
|
|
156
|
|
4
|
157 -h, --help <none> print this help and exit
|
|
|
158 -f, --full-help <none> print a complete help and exit
|
|
|
159 -v <none> print version and exit
|
|
3
|
160
|
|
|
161 Mandatory arguments
|
|
4
|
162 -i <FILE> set genome index file (without the extension filename)
|
|
|
163 -r <FILE> [FILE2] set read file. Specify FILE2 in case of paired-end reads
|
|
|
164 -k <INT> set k-mer length
|
|
|
165 -o, --sam <FILE> set SAM output filename or print on STDOUT with "-o -" argument
|
|
3
|
166
|
|
|
167 Optional arguments
|
|
|
168 * Protocol
|
|
4
|
169 --stranded <none> set the read mapping with for a strand specific library (DEFAULT non-strand specific)
|
|
3
|
170
|
|
|
171 * Efficiency
|
|
4
|
172 --nb-threads <INT> set the number of worker threads (DEFAULT 1)
|
|
|
173 --read-length, -m <INT> set read length in case of all reads have the same length to optimize
|
|
3
|
174 CPU and memory times
|
|
4
|
175 --treat-multiple <none> consider alignments with multiple locations (>max-duplication) rather than considering a no-alignment in the SAM file
|
|
|
176 --max-locs <INT> set the maximum number of locations on the reference index (DEFAULT 300)
|
|
3
|
177
|
|
|
178 * Accuracy
|
|
4
|
179 --no-ambiguity <none> discard biological events (splice, snv, indel, chimera) which have several matches on the reference index
|
|
3
|
180
|
|
|
181
|
|
|
182 Optional output arguments
|
|
4
|
183 --all <FILE> set output base filename for all causes following
|
|
|
184 --gz <none> all output files specified after this argument are gzipped
|
|
3
|
185
|
|
|
186 * Summary and statistics
|
|
4
|
187 --summary <FILE> set output summary file
|
|
3
|
188 * Mapping
|
|
4
|
189 --single <FILE> set output single file
|
|
|
190 --duplicate <FILE> set output duplication file
|
|
|
191 --multiple <FILE> set output multiple file
|
|
|
192 --none <FILE> set output none file
|
|
|
193 --normal <FILE> set output normal file
|
|
|
194 --almost-normal <FILE> set output almost normal file
|
|
3
|
195
|
|
|
196 * Biological causes
|
|
4
|
197 --snv <FILE> set output SNV file
|
|
|
198 --indel <FILE> set output short indel file
|
|
|
199 --splice <FILE> set output splice junction file
|
|
|
200 --weak-splice <FILE> set output coverless splice junction file
|
|
|
201 --chimera <FILE> set output chimera junction file
|
|
|
202 --paired-end-chimera <FILE> set output for paired-end chimera file
|
|
|
203 --biological <FILE> set output bio-undetermined file
|
|
3
|
204
|
|
|
205 * Sequence errors
|
|
4
|
206 --errors <FILE> set output sequence errors file
|
|
3
|
207
|
|
|
208 * Repetition
|
|
4
|
209 --repeat <FILE> set output repetition file
|
|
3
|
210
|
|
|
211 * Other causes
|
|
4
|
212 --undetermined <FILE> set output undetermined file
|
|
|
213 --nothing <FILE> set output nothing file
|
|
3
|
214
|
|
|
215 Optional process for specific research
|
|
4
|
216 --deep-snv <none> will search hard to find SNPs
|
|
|
217 --stringent-chimera <none> will search chimeras with more accuracy (but less sensitivity)
|
|
3
|
218
|
|
|
219 Optional process launcher (once must be selected)
|
|
|
220 * Exact matching tool
|
|
4
|
221 --emt <none> launch CRAC-emt for exact mapping of short reads
|
|
3
|
222
|
|
|
223 * Server tool (for debugging)
|
|
4
|
224 --server <none> launch CRAC server,the output arguments will
|
|
3
|
225 not be taken into account
|
|
4
|
226 --input-name-server <STRING> DEFAULT classify.fifo
|
|
|
227 --output-name-server <STRING> DEFAULT classify.out.fifo
|
|
3
|
228
|
|
|
229 Additional settings for users
|
|
|
230 * Sam output file
|
|
4
|
231 --detailed-sam <none> more informations are added in SAM output file
|
|
3
|
232
|
|
|
233 * Mapping
|
|
4
|
234 --min-percent-single-loc <FLOAT> DEFAULT 0.15
|
|
|
235 --min-duplication <INT> DEFAULT 2
|
|
|
236 --max-duplication <INT> DEFAULT 9
|
|
|
237 --min-percent-duplication-loc <FLOAT> DEFAULT 0.15
|
|
|
238 --min-percent-multiple-loc <FLOAT> DEFAULT 0.50
|
|
|
239 --min-repetition <INT> DEFAULT 20
|
|
|
240 --min-percent-repetition-loc <FLOAT> DEFAULT 0.20
|
|
3
|
241 * Biological causes
|
|
4
|
242 --max-splice-length <INT> DEFAULT 300000
|
|
|
243 --max-paired-end-length <INT> DEFAULT 300000
|
|
|
244 --max-bio-indel <INT> DEFAULT 15
|
|
|
245 --max-bases-retrieved <INT> DEFAULT 15
|
|
3
|
246 * Undetermined
|
|
4
|
247 --min-support-no-cover <FLOAT> DEFAULT 1.30
|
|
3
|
248
|
|
|
249 Additional settings for advanced users
|
|
|
250 * Break verification and fusion (merging mirage breaks)
|
|
4
|
251 --min-break-length <FLOAT> DEFAULT 0.50
|
|
|
252 --max-bases-randomly-matched <INT> DEFAULT 10
|
|
|
253 --max-extension-length <INT> DEFAULT 10
|
|
3
|
254
|
|
|
255 * Threading
|
|
4
|
256 --nb-tags-info-stored <INT> DEFAULT 1000
|
|
3
|
257
|
|
|
258 * Deep SNV search option
|
|
4
|
259 --nb-nucleotides-snv-comparison <INT> DEFAULT 8
|
|
3
|
260 </help>
|
|
|
261
|
|
|
262 </tool>
|