comparison defuse.xml @ 19:1af6f32ff592

Add datamanager, move to defuse_reference.loc
author Jim Johnson <jj@umn.edu>
date Fri, 21 Jun 2013 14:46:11 -0500
parents 547d8db4673e
children 68494d6aabeb
comparison
equal deleted inserted replaced
18:547d8db4673e 19:1af6f32ff592
10 <command interpreter="command"> /bin/bash $shscript </command> 10 <command interpreter="command"> /bin/bash $shscript </command>
11 <inputs> 11 <inputs>
12 <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/> 12 <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/>
13 <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/> 13 <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/>
14 <conditional name="refGenomeSource"> 14 <conditional name="refGenomeSource">
15 <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help=""> 15 <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help="">
16 <option value="indexed">Use a built-in DeFuse Reference Dataset</option> 16 <option value="indexed">Use a built-in DeFuse Reference Dataset</option>
17 <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option> 17 <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option>
18 </param>
19 <when value="indexed">
20 <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team">
21 <options from_file="defuse_reference.loc">
22 <column name="name" index="1"/>
23 <column name="value" index="2"/>
24 <filter type="sort_by" column="0" />
25 <validator type="no_options" message="No indexes are available" />
26 </options>
18 </param> 27 </param>
19 <when value="indexed"> 28 </when>
20 <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team"> 29 <when value="history">
21 <options from_file="defuse.loc"> 30 <param name="config" type="data" format="defuse.conf" label="Defuse Config file" help=""/>
22 <column name="name" index="1"/> 31 </when> <!-- history -->
23 <column name="value" index="2"/>
24 <filter type="sort_by" column="0" />
25 <validator type="no_options" message="No indexes are available" />
26 </options>
27 </param>
28 <conditional name="defuse_param">
29 <param name="settings" type="select" label="Defuse parameter settings" help="">
30 <option value="preSet">Default settings</option>
31 <option value="full">Full parameter list</option>
32 </param>
33 <when value="preSet" />
34 <when value="full">
35 <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" />
36 <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" />
37 <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" />
38 <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision">
39 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
40 </param>
41 <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" />
42 <param name="split_count_threshold" type="integer" value="3" optional="true" label="Filter split_count_threshold" />
43 <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold">
44 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
45 </param>
46 <param name="max_dist_pos" type="integer" value="600" optional="true" label="Filter max_dist_pos" />
47 <param name="num_dist_genes" type="integer" value="500" optional="true" label="Filter num_dist_genes" />
48 <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" />
49 <param name="max_concordant_ratio" type="float" value="0.1" optional="true" label="Filter max_concordant_ratio">
50 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
51 </param>
52 <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" />
53 <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold">
54 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
55 </param>
56 <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density">
57 <help>Position density when calculating covariance</help>
58 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
59 </param>
60 <param name="denovo_assembly" type="select" label="denovo_assembly" help="">
61 <option value="">Use Default</option>
62 <option value="no">no</option>
63 <option value="yes">yes</option>
64 </param>
65 <!--
66 <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/>
67 -->
68 </when> <!-- full -->
69 </conditional> <!-- defuse_param -->
70 </when>
71 <when value="history">
72 <param name="config" type="data" format="txt" label="Defuse Config file" help=""/>
73 </when> <!-- history -->
74 </conditional> <!-- refGenomeSource --> 32 </conditional> <!-- refGenomeSource -->
33 <conditional name="defuse_param">
34 <param name="settings" type="select" label="Defuse parameter settings" help="">
35 <option value="preSet">Default settings</option>
36 <option value="full">Full parameter list</option>
37 </param>
38 <when value="preSet" />
39 <when value="full">
40 <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" />
41 <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" />
42 <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" />
43 <param name="calculate_extra_annotations" type="select" label="Calculate extra annotations, fusion splice index and interrupted index" help="">
44 <option value="">Use Default</option>
45 <option value="no">no</option>
46 <option value="yes">yes</option>
47 </param>
48 <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision">
49 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
50 </param>
51 <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" />
52 <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold">
53 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
54 </param>
55 <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" />
56 <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" />
57 <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold">
58 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
59 </param>
60 <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density">
61 <help>Position density when calculating covariance</help>
62 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
63 </param>
64 <param name="denovo_assembly" type="select" label="denovo_assembly" help="">
65 <option value="">Use Default</option>
66 <option value="no">no</option>
67 <option value="yes">yes</option>
68 </param>
69 <!--
70 <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/>
71 -->
72 <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" />
73 </when> <!-- full -->
74 </conditional> <!-- defuse_param -->
75 <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" 75 <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files"
76 help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, 76 help="The defuse output working directory can be helpful for determining errors that may have occurred during the run,
77 but they require considerable diskspace, and should be deleted and purged when no longer needed."/> 77 but they require considerable diskspace, and should be deleted and purged when no longer needed."/>
78 <param name="do_get_reads" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/> 78 <param name="do_get_reads" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/>
79 </inputs> 79 </inputs>
81 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> 81 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/>
82 <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" /> 82 <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" />
83 <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)"> 83 <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)">
84 <filter>keep_output == True</filter> 84 <filter>keep_output == True</filter>
85 </data> 85 </data>
86 <data format="tabular" name="results_tsv" label="${tool.name} on ${on_string}: results.tsv" />
87 <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" /> 86 <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" />
88 <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" /> 87 <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" />
89 <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads"> 88 <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads">
90 <filter>do_get_reads == True</filter> 89 <filter>do_get_reads == True</filter>
91 </data> 90 </data>
91 <!--
92 expression_plot
93 circos plot
94 -->
92 </outputs> 95 </outputs>
93 <configfiles> 96 <configfiles>
94 <configfile name="defuse_config"> 97 <configfile name="defuse_config">
95 #import ast 98 #import re
99 #set $ds = chr(36)
96 #if $refGenomeSource.genomeSource == "history": 100 #if $refGenomeSource.genomeSource == "history":
97 #include raw $refGenomeSource.config.__str__ 101 #set config_file = $refGenomeSource.config.__str__
102 #set
98 #else 103 #else
99 #set $ref_dict = dict($ast.literal_eval($refGenomeSource.index.value)) 104 #set config_file = $refGenomeSource.index.value
105 #end if
106 #set pat = '^\s*([^#=][^=]*?)\s*=\s*(.*?)\s*$'
107 #set fh = open()
108 #set keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources']
109 #set kv = []
110 #for $line in $fh:
111 #set m = $re.match($pat,$line)
112 #if $m and len($m.groups()) == 2:
113 ## #echo $line
114 #if $m.groups()[0] in keys:
115 #set k = $m.groups()[0]
116 #if k == 'dataset_directory' and $refGenomeSource.genomeSource == "indexed":
117 ## The DataManager is conifgured to place the config file in the same directory as the defuse_data: dataset_directory
118 #set v = $os.path.dirname($config_file)
119 #else:
120 #set v = $m.groups()[1]
121 #end if
122 #set kv = $kv + [[$k, $v]]
123 #end if
124 #end if
125 #end for
126 ## #echo $kv
127 #set ref_dict = dict($kv)
128 ## #echo $ref_dict
129 ## include raw $refGenomeSource.config.__str__
100 # 130 #
101 # Configuration file for defuse 131 # Configuration file for defuse
102 # 132 #
103 # At a minimum, change all values enclused by [] 133 # At a minimum, change all values enclused by []
104 # 134 #
105 135
106 # Directory where the defuse code was unpacked 136 # Directory where the defuse code was unpacked
107 ## Default location in the tool/defuse directory 137 ## Default location in the tool/defuse directory
108 # source_directory = ${__root_dir__}/tools/defuse 138 # source_directory = ${__root_dir__}/tools/defuse
109 source_directory = #slurp 139 source_directory = __DEFUSE_PATH__
110 #try
111 $ref_dict['source_directory']
112 #except
113 __DEFUSE_PATH__
114 #end try
115 140
116 # Directory where you want your dataset 141 # Directory where you want your dataset
117 dataset_directory = #slurp 142 dataset_directory = #slurp
118 #try 143 #try
119 $ref_dict['dataset_directory'] 144 $ref_dict['dataset_directory']
164 #except 189 #except
165 \$(dataset_directory)/Hs.seq.uniq 190 \$(dataset_directory)/Hs.seq.uniq
166 #end try 191 #end try
167 192
168 # Paths to external tools 193 # Paths to external tools
169 bowtie_bin = #slurp 194 bowtie_bin = __BOWTIE_BIN__
170 #try 195 bowtie_build_bin = __BOWTIE_BUILD_BIN__
171 $ref_dict['bowtie_bin'] 196 blat_bin = __BLAT_BIN__
172 #except 197 fatotwobit_bin = __FATOTWOBIT_BIN__
173 __BOWTIE_BIN__ 198 gmap_bin = __GMAP_BIN__
174 #end try 199 gmap_bin = __GMAP_BIN__
175 bowtie_build_bin = #slurp 200 gmap_setup_bin = __GMAP_SETUP_BIN__
176 #try 201 r_bin = __R_BIN__
177 $ref_dict['bowtie_build_bin'] 202 rscript_bin = __RSCRIPT_BIN__
178 #except
179 __BOWTIE_BUILD_BIN__
180 #end try
181 blat_bin = #slurp
182 #try
183 $ref_dict['blat_bin']
184 #except
185 __BLAT_BIN__
186 #end try
187 fatotwobit_bin = #slurp
188 #try
189 $ref_dict['fatotwobit_bin']
190 #except
191 __FATOTWOBIT_BIN__
192 #end try
193 gmap_bin = #slurp
194 #try
195 $ref_dict['gmap_bin']
196 #except
197 __GMAP_BIN__
198 #end try
199 gmap_bin = #slurp
200 #try
201 $ref_dict['gmap_bin']
202 #except
203 __GMAP_BIN__
204 #end try
205 gmap_setup_bin = #slurp
206 #try
207 $ref_dict['gmap_setup_bin']
208 #except
209 __GMAP_SETUP_BIN__
210 #end try
211 r_bin = #slurp
212 #try
213 $ref_dict['r_bin']
214 #except
215 __R_BIN__
216 #end try
217 rscript_bin = #slurp
218 #try
219 $ref_dict['rscript_bin']
220 #except
221 __RSCRIPT_BIN__
222 #end try
223 203
224 # Directory where you want your dataset 204 # Directory where you want your dataset
225 gmap_index_directory = #slurp 205 gmap_index_directory = #slurp
226 #try 206 #try
227 $ref_dict['gmap_index_directory'] 207 $ref_dict['gmap_index_directory']
282 $ref_dict['bowtie_quals'] 262 $ref_dict['bowtie_quals']
283 #except 263 #except
284 --phred33-quals 264 --phred33-quals
285 #end try 265 #end try
286 max_insert_size = #slurp 266 max_insert_size = #slurp
287 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_insert_size.__str__ != "": 267 #if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "":
288 $refGenomeSource.defuse_param.max_insert_size 268 $defuse_param.max_insert_size
289 #else 269 #else
290 #try 270 #try
291 $ref_dict['max_insert_size'] 271 $ref_dict['max_insert_size']
292 #except 272 #except
293 500 273 500
334 10000 314 10000
335 #end try 315 #end try
336 316
337 # Minimum gene fusion range 317 # Minimum gene fusion range
338 dna_concordant_length = #slurp 318 dna_concordant_length = #slurp
339 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.dna_concordant_length.__str__ != "": 319 #if $defuse_param.settings == "full" and $defuse_param.dna_concordant_length.__str__ != "":
340 $refGenomeSource.defuse_param.dna_concordant_length 320 $defuse_param.dna_concordant_length
341 #else 321 #else
342 #try 322 #try
343 $ref_dict['dna_concordant_length'] 323 $ref_dict['dna_concordant_length']
344 #except 324 #except
345 2000 325 2000
346 #end try 326 #end try
347 #end if 327 #end if
348 328
349 # Trim length for discordant reads (split reads are not trimmed) 329 # Trim length for discordant reads (split reads are not trimmed)
350 discord_read_trim = #slurp 330 discord_read_trim = #slurp
351 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.discord_read_trim.__str__ != "": 331 #if $defuse_param.settings == "full" and $defuse_param.discord_read_trim.__str__ != "":
352 $refGenomeSource.defuse_param.discord_read_trim 332 $defuse_param.discord_read_trim
353 #else 333 #else
354 #try 334 #try
355 $ref_dict['discord_read_trim'] 335 $ref_dict['discord_read_trim']
356 #except 336 #except
357 50 337 50
358 #end try 338 #end try
359 #end if 339 #end if
360 340 # Calculate extra annotations, fusion splice index and interrupted index
341 calculate_extra_annotations = #slurp
342 #if $defuse_param.settings == "full" and $defuse_param.calculate_extra_annotations.__str__ != "":
343 $defuse_param.calculate_extra_annotations
344 #else
345 #try
346 $ref_dict['calculate_extra_annotations']
347 #except
348 no
349 #end try
350 #end if
361 # Filtering parameters 351 # Filtering parameters
362 clustering_precision = #slurp 352 clustering_precision = #slurp
363 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.clustering_precision.__str__ != "" 353 #if $defuse_param.settings == "full" and $defuse_param.clustering_precision.__str__ != ""
364 $refGenomeSource.defuse_param.clustering_precision 354 $defuse_param.clustering_precision
365 #else 355 #else
366 #try 356 #try
367 $ref_dict['clustering_precision'] 357 $ref_dict['clustering_precision']
368 #except 358 #except
369 0.95 359 0.95
370 #end try 360 #end try
371 #end if 361 #end if
372 span_count_threshold = #slurp 362 span_count_threshold = #slurp
373 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.span_count_threshold.__str__ != "" 363 #if $defuse_param.settings == "full" and $defuse_param.span_count_threshold.__str__ != ""
374 $refGenomeSource.defuse_param.span_count_threshold 364 $defuse_param.span_count_threshold
375 #else 365 #else
376 #try 366 #try
377 $ref_dict['span_count_threshold'] 367 $ref_dict['span_count_threshold']
378 #except 368 #except
379 5 369 5
380 #end try 370 #end try
381 #end if 371 #end if
382 split_count_threshold = #slurp
383 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_count_threshold.__str__ != ""
384 $refGenomeSource.defuse_param.split_count_threshold
385 #else
386 #try
387 $ref_dict['split_count_threshold']
388 #except
389 3
390 #end try
391 #end if
392 percent_identity_threshold = #slurp 372 percent_identity_threshold = #slurp
393 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.percent_identity_threshold.__str__ != "" 373 #if $defuse_param.settings == "full" and $defuse_param.percent_identity_threshold.__str__ != ""
394 $refGenomeSource.defuse_param.percent_identity_threshold 374 $defuse_param.percent_identity_threshold
395 #else 375 #else
396 #try 376 #try
397 $ref_dict['percent_identity_threshold'] 377 $ref_dict['percent_identity_threshold']
398 #except 378 #except
399 0.90 379 0.90
400 #end try 380 #end try
401 #end if 381 #end if
402 max_dist_pos = #slurp
403 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_dist_pos.__str__ != ""
404 $refGenomeSource.defuse_param.max_dist_pos
405 #else
406 #try
407 $ref_dict['max_dist_pos']
408 #except
409 600
410 #end try
411 #end if
412 num_dist_genes = #slurp
413 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.num_dist_genes.__str__ != ""
414 $refGenomeSource.defuse_param.num_dist_genes
415 #else
416 #try
417 $ref_dict['num_dist_genes']
418 #except
419 500
420 #end try
421 #end if
422 split_min_anchor = #slurp 382 split_min_anchor = #slurp
423 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_min_anchor.__str__ != "" 383 #if $defuse_param.settings == "full" and $defuse_param.split_min_anchor.__str__ != ""
424 $refGenomeSource.defuse_param.split_min_anchor 384 $defuse_param.split_min_anchor
425 #else 385 #else
426 #try 386 #try
427 $ref_dict['split_min_anchor'] 387 $ref_dict['split_min_anchor']
428 #except 388 #except
429 4 389 4
430 #end try 390 #end try
431 #end if 391 #end if
432 max_concordant_ratio = #slurp
433 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_concordant_ratio.__str__ != ""
434 $refGenomeSource.defuse_param.max_concordant_ratio
435 #else
436 #try
437 $ref_dict['max_concordant_ratio']
438 #except
439 0.1
440 #end try
441 #end if
442 splice_bias = #slurp 392 splice_bias = #slurp
443 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.splice_bias.__str__ != "" 393 #if $defuse_param.settings == "full" and $defuse_param.splice_bias.__str__ != ""
444 $refGenomeSource.defuse_param.splice_bias 394 $defuse_param.splice_bias
445 #else 395 #else
446 #try 396 #try
447 $ref_dict['splice_bias'] 397 $ref_dict['splice_bias']
448 #except 398 #except
449 10 399 10
450 #end try 400 #end try
451 #end if 401 #end if
452 denovo_assembly = #slurp 402 denovo_assembly = #slurp
453 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.denovo_assembly.__str__ != "" 403 #if $defuse_param.settings == "full" and $defuse_param.denovo_assembly.__str__ != ""
454 $refGenomeSource.defuse_param.denovo_assembly 404 $defuse_param.denovo_assembly
455 #else 405 #else
456 #try 406 #try
457 $ref_dict['denovo_assembly'] 407 $ref_dict['denovo_assembly']
458 #except 408 #except
459 no 409 no
460 #end try 410 #end try
461 #end if 411 #end if
462 probability_threshold = #slurp 412 probability_threshold = #slurp
463 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.probability_threshold.__str__ != "" 413 #if $defuse_param.settings == "full" and $defuse_param.probability_threshold.__str__ != ""
464 $refGenomeSource.defuse_param.probability_threshold 414 $defuse_param.probability_threshold
465 #else 415 #else
466 #try 416 #try
467 $ref_dict['probability_threshold'] 417 $ref_dict['probability_threshold']
468 #except 418 #except
469 0.50 419 0.50
471 #end if 421 #end if
472 positive_controls = \$(data_directory)/controls.txt 422 positive_controls = \$(data_directory)/controls.txt
473 423
474 # Position density when calculating covariance 424 # Position density when calculating covariance
475 covariance_sampling_density = #slurp 425 covariance_sampling_density = #slurp
476 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.covariance_sampling_density.__str__ != "" 426 #if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != ""
477 $refGenomeSource.defuse_param.covariance_sampling_density 427 $defuse_param.covariance_sampling_density
478 #else 428 #else
479 #try 429 #try
480 $ref_dict['covariance_sampling_density'] 430 $ref_dict['covariance_sampling_density']
481 #except 431 #except
482 0.01 432 0.01
483 #end try 433 #end try
484 #end if 434 #end if
485
486
487 # Number of reads for each job in split 435 # Number of reads for each job in split
488 reads_per_job = 1000000 436 reads_per_job = #slurp
489 437 #if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != ""
490 # Number of regions for each breakpoint sequence job in split 438 $defuse_param.reads_per_job
491 regions_per_job = 20 439 #else
440 #try
441 $ref_dict['reads_per_job']
442 #except
443 1000000
444 #end try
445 #end if
492 446
493 #raw 447 #raw
494 # If you have command line 'mail' and wish to be notified 448 # If you have command line 'mail' and wish to be notified
495 # mailto = andrew.mcpherson@gmail.com 449 # mailto = andrew.mcpherson@gmail.com
496 450
497 # Remove temp files 451 # Remove temp files
498 remove_job_files = yes 452 remove_job_files = yes
499 remove_job_temp_files = yes 453 remove_job_temp_files = yes
500 454
501 # Converting to fastq
502 # Fastq converter config format 1 for reads stored in separate files for each end
503 # data_lane_rexex_N is a perl regex which stores the lane id in $1
504 # data_end_regex_N is a perl regex which stores the end, 1 or 2, in $1
505 # data_compress_regex_N is a perl regex which stores the compression extension in $1
506 # data_convert_N is the associated conversion utility that takes data at stdin and outputs fastq at stdout
507 # Fastq converter config format 2 for reads stored in separate files for each end
508 # data_lane_regex_N is a perl regex which stores the lane id in $1
509 # data_compress_regex_N is a perl regex which stores the compression extension in $1
510 # data_end1_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 1 at stdout
511 # data_end2_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 2 at stdout
512
513 data_lane_regex_1 = ^(.+)_[12]_export\.txt.*$
514 data_end_regex_1 = ^.+_([12])_export\.txt.*$
515 data_compress_regex_1 = ^.+_[12]_export\.txt(.*)$
516 data_converter_1 = $(scripts_directory)/fq_all2std.pl export2std
517
518 data_lane_regex_2 = ^(.+)_[12]_concat_qseq\.txt.*$
519 data_end_regex_2 = ^.+_([12])_concat_qseq\.txt.*$
520 data_compress_regex_2 = ^.+_[12]_concat_qseq\.txt(.*)$
521 data_converter_2 = $(scripts_directory)/qseq2fastq.pl
522
523 data_lane_regex_3 = ^(.+)\.bam.*$
524 data_compress_regex_3 = ^.+\.bam(.*)$
525 data_end1_converter_3 = samtools view - | filter_sam_mate.pl 1 | sam_to_fastq.pl
526 data_end2_converter_3 = samtools view - | filter_sam_mate.pl 2 | sam_to_fastq.pl
527
528 data_lane_regex_4 = ^(.+).[12].fastq.*$
529 data_end_regex_4 = ^.+.([12]).fastq.*$
530 data_compress_regex_4 = ^.+.[12].fastq(.*)$
531 data_converter_4 = cat
532 #end raw 455 #end raw
533 456
534 #end if
535 457
536 </configfile> 458 </configfile>
537 <configfile name="shscript"> 459 <configfile name="shscript">
538 #!/bin/bash 460 #!/bin/bash
539 ## define some things for cheetah proccessing 461 ## define some things for cheetah proccessing
600 #end if 522 #end if
601 ## run defuse.pl 523 ## run defuse.pl
602 perl \${DEFUSE_PATH}/scripts/defuse.pl -c $defuse_config -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p 8 524 perl \${DEFUSE_PATH}/scripts/defuse.pl -c $defuse_config -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p 8
603 ## copy primary results to output datasets 525 ## copy primary results to output datasets
604 if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi 526 if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi
605 if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi 527 ## if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi
606 if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi 528 if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi
607 if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi 529 if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi
608 ## create html with links for output_dir 530 ## create html with links for output_dir
609 #if $defuse_out.__str__ != 'None': 531 #if $defuse_out.__str__ != 'None':
610 if [ -e $defuse_out ] 532 if [ -e $defuse_out ]
648 570
649 DeFuse requires 2 fastq files for paried reads, one with the left mate of the paired reads, and a second fastq with the the right mate of the paired reads (**with reads in the same order as in the first fastq dataset**). 571 DeFuse requires 2 fastq files for paried reads, one with the left mate of the paired reads, and a second fastq with the the right mate of the paired reads (**with reads in the same order as in the first fastq dataset**).
650 572
651 If your fastq files have reads in different orders or include unpaired reads, you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq. 573 If your fastq files have reads in different orders or include unpaired reads, you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq.
652 574
653 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_: 575 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.4_:
654 - genome_fasta from Ensembl 576 - genome_fasta from Ensembl
655 - gene_models from Ensembl 577 - gene_models from Ensembl
656 - repeats_filename from UCSC RepeatMasker rmsk.txt 578 - repeats_filename from UCSC RepeatMasker rmsk.txt
657 - est_fasta from UCSC 579 - est_fasta from UCSC
658 - est_alignments from UCSC intronEst.txt 580 - est_alignments from UCSC intronEst.txt
659 - unigene_fasta from NCBI 581 - unigene_fasta from NCBI
660 582
661 .. _DeFuse_Version_0.6: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.1 583 .. _DeFuse_Version_0.4: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2
662 584
663 ------ 585 ------
664 586
665 **Outputs** 587 **Outputs**
666 588