Mercurial > repos > jjohnson > defuse
comparison defuse.xml @ 19:1af6f32ff592
Add datamanager, move to defuse_reference.loc
| author | Jim Johnson <jj@umn.edu> |
|---|---|
| date | Fri, 21 Jun 2013 14:46:11 -0500 |
| parents | 547d8db4673e |
| children | 68494d6aabeb |
comparison
equal
deleted
inserted
replaced
| 18:547d8db4673e | 19:1af6f32ff592 |
|---|---|
| 10 <command interpreter="command"> /bin/bash $shscript </command> | 10 <command interpreter="command"> /bin/bash $shscript </command> |
| 11 <inputs> | 11 <inputs> |
| 12 <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/> | 12 <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads. (FASTQ interlacer will pair reads and remove the unpaired. FASTQ de-interlacer will separate the result into left and right reads.)"/> |
| 13 <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/> | 13 <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/> |
| 14 <conditional name="refGenomeSource"> | 14 <conditional name="refGenomeSource"> |
| 15 <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help=""> | 15 <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help=""> |
| 16 <option value="indexed">Use a built-in DeFuse Reference Dataset</option> | 16 <option value="indexed">Use a built-in DeFuse Reference Dataset</option> |
| 17 <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option> | 17 <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option> |
| 18 </param> | |
| 19 <when value="indexed"> | |
| 20 <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team"> | |
| 21 <options from_file="defuse_reference.loc"> | |
| 22 <column name="name" index="1"/> | |
| 23 <column name="value" index="2"/> | |
| 24 <filter type="sort_by" column="0" /> | |
| 25 <validator type="no_options" message="No indexes are available" /> | |
| 26 </options> | |
| 18 </param> | 27 </param> |
| 19 <when value="indexed"> | 28 </when> |
| 20 <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team"> | 29 <when value="history"> |
| 21 <options from_file="defuse.loc"> | 30 <param name="config" type="data" format="defuse.conf" label="Defuse Config file" help=""/> |
| 22 <column name="name" index="1"/> | 31 </when> <!-- history --> |
| 23 <column name="value" index="2"/> | |
| 24 <filter type="sort_by" column="0" /> | |
| 25 <validator type="no_options" message="No indexes are available" /> | |
| 26 </options> | |
| 27 </param> | |
| 28 <conditional name="defuse_param"> | |
| 29 <param name="settings" type="select" label="Defuse parameter settings" help=""> | |
| 30 <option value="preSet">Default settings</option> | |
| 31 <option value="full">Full parameter list</option> | |
| 32 </param> | |
| 33 <when value="preSet" /> | |
| 34 <when value="full"> | |
| 35 <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" /> | |
| 36 <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" /> | |
| 37 <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" /> | |
| 38 <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision"> | |
| 39 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> | |
| 40 </param> | |
| 41 <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" /> | |
| 42 <param name="split_count_threshold" type="integer" value="3" optional="true" label="Filter split_count_threshold" /> | |
| 43 <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold"> | |
| 44 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> | |
| 45 </param> | |
| 46 <param name="max_dist_pos" type="integer" value="600" optional="true" label="Filter max_dist_pos" /> | |
| 47 <param name="num_dist_genes" type="integer" value="500" optional="true" label="Filter num_dist_genes" /> | |
| 48 <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" /> | |
| 49 <param name="max_concordant_ratio" type="float" value="0.1" optional="true" label="Filter max_concordant_ratio"> | |
| 50 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
| 51 </param> | |
| 52 <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" /> | |
| 53 <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold"> | |
| 54 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
| 55 </param> | |
| 56 <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density"> | |
| 57 <help>Position density when calculating covariance</help> | |
| 58 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
| 59 </param> | |
| 60 <param name="denovo_assembly" type="select" label="denovo_assembly" help=""> | |
| 61 <option value="">Use Default</option> | |
| 62 <option value="no">no</option> | |
| 63 <option value="yes">yes</option> | |
| 64 </param> | |
| 65 <!-- | |
| 66 <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/> | |
| 67 --> | |
| 68 </when> <!-- full --> | |
| 69 </conditional> <!-- defuse_param --> | |
| 70 </when> | |
| 71 <when value="history"> | |
| 72 <param name="config" type="data" format="txt" label="Defuse Config file" help=""/> | |
| 73 </when> <!-- history --> | |
| 74 </conditional> <!-- refGenomeSource --> | 32 </conditional> <!-- refGenomeSource --> |
| 33 <conditional name="defuse_param"> | |
| 34 <param name="settings" type="select" label="Defuse parameter settings" help=""> | |
| 35 <option value="preSet">Default settings</option> | |
| 36 <option value="full">Full parameter list</option> | |
| 37 </param> | |
| 38 <when value="preSet" /> | |
| 39 <when value="full"> | |
| 40 <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" /> | |
| 41 <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" /> | |
| 42 <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" /> | |
| 43 <param name="calculate_extra_annotations" type="select" label="Calculate extra annotations, fusion splice index and interrupted index" help=""> | |
| 44 <option value="">Use Default</option> | |
| 45 <option value="no">no</option> | |
| 46 <option value="yes">yes</option> | |
| 47 </param> | |
| 48 <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision"> | |
| 49 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> | |
| 50 </param> | |
| 51 <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" /> | |
| 52 <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold"> | |
| 53 <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/> | |
| 54 </param> | |
| 55 <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" /> | |
| 56 <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" /> | |
| 57 <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold"> | |
| 58 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
| 59 </param> | |
| 60 <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density"> | |
| 61 <help>Position density when calculating covariance</help> | |
| 62 <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/> | |
| 63 </param> | |
| 64 <param name="denovo_assembly" type="select" label="denovo_assembly" help=""> | |
| 65 <option value="">Use Default</option> | |
| 66 <option value="no">no</option> | |
| 67 <option value="yes">yes</option> | |
| 68 </param> | |
| 69 <!-- | |
| 70 <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/> | |
| 71 --> | |
| 72 <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" /> | |
| 73 </when> <!-- full --> | |
| 74 </conditional> <!-- defuse_param --> | |
| 75 <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" | 75 <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" |
| 76 help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, | 76 help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, |
| 77 but they require considerable diskspace, and should be deleted and purged when no longer needed."/> | 77 but they require considerable diskspace, and should be deleted and purged when no longer needed."/> |
| 78 <param name="do_get_reads" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/> | 78 <param name="do_get_reads" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/> |
| 79 </inputs> | 79 </inputs> |
| 81 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> | 81 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/> |
| 82 <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" /> | 82 <data format="txt" name="defuse_log" label="${tool.name} on ${on_string}: defuse.log" /> |
| 83 <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)"> | 83 <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)"> |
| 84 <filter>keep_output == True</filter> | 84 <filter>keep_output == True</filter> |
| 85 </data> | 85 </data> |
| 86 <data format="tabular" name="results_tsv" label="${tool.name} on ${on_string}: results.tsv" /> | |
| 87 <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" /> | 86 <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" /> |
| 88 <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" /> | 87 <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" /> |
| 89 <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads"> | 88 <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads"> |
| 90 <filter>do_get_reads == True</filter> | 89 <filter>do_get_reads == True</filter> |
| 91 </data> | 90 </data> |
| 91 <!-- | |
| 92 expression_plot | |
| 93 circos plot | |
| 94 --> | |
| 92 </outputs> | 95 </outputs> |
| 93 <configfiles> | 96 <configfiles> |
| 94 <configfile name="defuse_config"> | 97 <configfile name="defuse_config"> |
| 95 #import ast | 98 #import re |
| 99 #set $ds = chr(36) | |
| 96 #if $refGenomeSource.genomeSource == "history": | 100 #if $refGenomeSource.genomeSource == "history": |
| 97 #include raw $refGenomeSource.config.__str__ | 101 #set config_file = $refGenomeSource.config.__str__ |
| 102 #set | |
| 98 #else | 103 #else |
| 99 #set $ref_dict = dict($ast.literal_eval($refGenomeSource.index.value)) | 104 #set config_file = $refGenomeSource.index.value |
| 105 #end if | |
| 106 #set pat = '^\s*([^#=][^=]*?)\s*=\s*(.*?)\s*$' | |
| 107 #set fh = open() | |
| 108 #set keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources'] | |
| 109 #set kv = [] | |
| 110 #for $line in $fh: | |
| 111 #set m = $re.match($pat,$line) | |
| 112 #if $m and len($m.groups()) == 2: | |
| 113 ## #echo $line | |
| 114 #if $m.groups()[0] in keys: | |
| 115 #set k = $m.groups()[0] | |
| 116 #if k == 'dataset_directory' and $refGenomeSource.genomeSource == "indexed": | |
| 117 ## The DataManager is conifgured to place the config file in the same directory as the defuse_data: dataset_directory | |
| 118 #set v = $os.path.dirname($config_file) | |
| 119 #else: | |
| 120 #set v = $m.groups()[1] | |
| 121 #end if | |
| 122 #set kv = $kv + [[$k, $v]] | |
| 123 #end if | |
| 124 #end if | |
| 125 #end for | |
| 126 ## #echo $kv | |
| 127 #set ref_dict = dict($kv) | |
| 128 ## #echo $ref_dict | |
| 129 ## include raw $refGenomeSource.config.__str__ | |
| 100 # | 130 # |
| 101 # Configuration file for defuse | 131 # Configuration file for defuse |
| 102 # | 132 # |
| 103 # At a minimum, change all values enclused by [] | 133 # At a minimum, change all values enclused by [] |
| 104 # | 134 # |
| 105 | 135 |
| 106 # Directory where the defuse code was unpacked | 136 # Directory where the defuse code was unpacked |
| 107 ## Default location in the tool/defuse directory | 137 ## Default location in the tool/defuse directory |
| 108 # source_directory = ${__root_dir__}/tools/defuse | 138 # source_directory = ${__root_dir__}/tools/defuse |
| 109 source_directory = #slurp | 139 source_directory = __DEFUSE_PATH__ |
| 110 #try | |
| 111 $ref_dict['source_directory'] | |
| 112 #except | |
| 113 __DEFUSE_PATH__ | |
| 114 #end try | |
| 115 | 140 |
| 116 # Directory where you want your dataset | 141 # Directory where you want your dataset |
| 117 dataset_directory = #slurp | 142 dataset_directory = #slurp |
| 118 #try | 143 #try |
| 119 $ref_dict['dataset_directory'] | 144 $ref_dict['dataset_directory'] |
| 164 #except | 189 #except |
| 165 \$(dataset_directory)/Hs.seq.uniq | 190 \$(dataset_directory)/Hs.seq.uniq |
| 166 #end try | 191 #end try |
| 167 | 192 |
| 168 # Paths to external tools | 193 # Paths to external tools |
| 169 bowtie_bin = #slurp | 194 bowtie_bin = __BOWTIE_BIN__ |
| 170 #try | 195 bowtie_build_bin = __BOWTIE_BUILD_BIN__ |
| 171 $ref_dict['bowtie_bin'] | 196 blat_bin = __BLAT_BIN__ |
| 172 #except | 197 fatotwobit_bin = __FATOTWOBIT_BIN__ |
| 173 __BOWTIE_BIN__ | 198 gmap_bin = __GMAP_BIN__ |
| 174 #end try | 199 gmap_bin = __GMAP_BIN__ |
| 175 bowtie_build_bin = #slurp | 200 gmap_setup_bin = __GMAP_SETUP_BIN__ |
| 176 #try | 201 r_bin = __R_BIN__ |
| 177 $ref_dict['bowtie_build_bin'] | 202 rscript_bin = __RSCRIPT_BIN__ |
| 178 #except | |
| 179 __BOWTIE_BUILD_BIN__ | |
| 180 #end try | |
| 181 blat_bin = #slurp | |
| 182 #try | |
| 183 $ref_dict['blat_bin'] | |
| 184 #except | |
| 185 __BLAT_BIN__ | |
| 186 #end try | |
| 187 fatotwobit_bin = #slurp | |
| 188 #try | |
| 189 $ref_dict['fatotwobit_bin'] | |
| 190 #except | |
| 191 __FATOTWOBIT_BIN__ | |
| 192 #end try | |
| 193 gmap_bin = #slurp | |
| 194 #try | |
| 195 $ref_dict['gmap_bin'] | |
| 196 #except | |
| 197 __GMAP_BIN__ | |
| 198 #end try | |
| 199 gmap_bin = #slurp | |
| 200 #try | |
| 201 $ref_dict['gmap_bin'] | |
| 202 #except | |
| 203 __GMAP_BIN__ | |
| 204 #end try | |
| 205 gmap_setup_bin = #slurp | |
| 206 #try | |
| 207 $ref_dict['gmap_setup_bin'] | |
| 208 #except | |
| 209 __GMAP_SETUP_BIN__ | |
| 210 #end try | |
| 211 r_bin = #slurp | |
| 212 #try | |
| 213 $ref_dict['r_bin'] | |
| 214 #except | |
| 215 __R_BIN__ | |
| 216 #end try | |
| 217 rscript_bin = #slurp | |
| 218 #try | |
| 219 $ref_dict['rscript_bin'] | |
| 220 #except | |
| 221 __RSCRIPT_BIN__ | |
| 222 #end try | |
| 223 | 203 |
| 224 # Directory where you want your dataset | 204 # Directory where you want your dataset |
| 225 gmap_index_directory = #slurp | 205 gmap_index_directory = #slurp |
| 226 #try | 206 #try |
| 227 $ref_dict['gmap_index_directory'] | 207 $ref_dict['gmap_index_directory'] |
| 282 $ref_dict['bowtie_quals'] | 262 $ref_dict['bowtie_quals'] |
| 283 #except | 263 #except |
| 284 --phred33-quals | 264 --phred33-quals |
| 285 #end try | 265 #end try |
| 286 max_insert_size = #slurp | 266 max_insert_size = #slurp |
| 287 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_insert_size.__str__ != "": | 267 #if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "": |
| 288 $refGenomeSource.defuse_param.max_insert_size | 268 $defuse_param.max_insert_size |
| 289 #else | 269 #else |
| 290 #try | 270 #try |
| 291 $ref_dict['max_insert_size'] | 271 $ref_dict['max_insert_size'] |
| 292 #except | 272 #except |
| 293 500 | 273 500 |
| 334 10000 | 314 10000 |
| 335 #end try | 315 #end try |
| 336 | 316 |
| 337 # Minimum gene fusion range | 317 # Minimum gene fusion range |
| 338 dna_concordant_length = #slurp | 318 dna_concordant_length = #slurp |
| 339 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.dna_concordant_length.__str__ != "": | 319 #if $defuse_param.settings == "full" and $defuse_param.dna_concordant_length.__str__ != "": |
| 340 $refGenomeSource.defuse_param.dna_concordant_length | 320 $defuse_param.dna_concordant_length |
| 341 #else | 321 #else |
| 342 #try | 322 #try |
| 343 $ref_dict['dna_concordant_length'] | 323 $ref_dict['dna_concordant_length'] |
| 344 #except | 324 #except |
| 345 2000 | 325 2000 |
| 346 #end try | 326 #end try |
| 347 #end if | 327 #end if |
| 348 | 328 |
| 349 # Trim length for discordant reads (split reads are not trimmed) | 329 # Trim length for discordant reads (split reads are not trimmed) |
| 350 discord_read_trim = #slurp | 330 discord_read_trim = #slurp |
| 351 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.discord_read_trim.__str__ != "": | 331 #if $defuse_param.settings == "full" and $defuse_param.discord_read_trim.__str__ != "": |
| 352 $refGenomeSource.defuse_param.discord_read_trim | 332 $defuse_param.discord_read_trim |
| 353 #else | 333 #else |
| 354 #try | 334 #try |
| 355 $ref_dict['discord_read_trim'] | 335 $ref_dict['discord_read_trim'] |
| 356 #except | 336 #except |
| 357 50 | 337 50 |
| 358 #end try | 338 #end try |
| 359 #end if | 339 #end if |
| 360 | 340 # Calculate extra annotations, fusion splice index and interrupted index |
| 341 calculate_extra_annotations = #slurp | |
| 342 #if $defuse_param.settings == "full" and $defuse_param.calculate_extra_annotations.__str__ != "": | |
| 343 $defuse_param.calculate_extra_annotations | |
| 344 #else | |
| 345 #try | |
| 346 $ref_dict['calculate_extra_annotations'] | |
| 347 #except | |
| 348 no | |
| 349 #end try | |
| 350 #end if | |
| 361 # Filtering parameters | 351 # Filtering parameters |
| 362 clustering_precision = #slurp | 352 clustering_precision = #slurp |
| 363 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.clustering_precision.__str__ != "" | 353 #if $defuse_param.settings == "full" and $defuse_param.clustering_precision.__str__ != "" |
| 364 $refGenomeSource.defuse_param.clustering_precision | 354 $defuse_param.clustering_precision |
| 365 #else | 355 #else |
| 366 #try | 356 #try |
| 367 $ref_dict['clustering_precision'] | 357 $ref_dict['clustering_precision'] |
| 368 #except | 358 #except |
| 369 0.95 | 359 0.95 |
| 370 #end try | 360 #end try |
| 371 #end if | 361 #end if |
| 372 span_count_threshold = #slurp | 362 span_count_threshold = #slurp |
| 373 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.span_count_threshold.__str__ != "" | 363 #if $defuse_param.settings == "full" and $defuse_param.span_count_threshold.__str__ != "" |
| 374 $refGenomeSource.defuse_param.span_count_threshold | 364 $defuse_param.span_count_threshold |
| 375 #else | 365 #else |
| 376 #try | 366 #try |
| 377 $ref_dict['span_count_threshold'] | 367 $ref_dict['span_count_threshold'] |
| 378 #except | 368 #except |
| 379 5 | 369 5 |
| 380 #end try | 370 #end try |
| 381 #end if | 371 #end if |
| 382 split_count_threshold = #slurp | |
| 383 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_count_threshold.__str__ != "" | |
| 384 $refGenomeSource.defuse_param.split_count_threshold | |
| 385 #else | |
| 386 #try | |
| 387 $ref_dict['split_count_threshold'] | |
| 388 #except | |
| 389 3 | |
| 390 #end try | |
| 391 #end if | |
| 392 percent_identity_threshold = #slurp | 372 percent_identity_threshold = #slurp |
| 393 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.percent_identity_threshold.__str__ != "" | 373 #if $defuse_param.settings == "full" and $defuse_param.percent_identity_threshold.__str__ != "" |
| 394 $refGenomeSource.defuse_param.percent_identity_threshold | 374 $defuse_param.percent_identity_threshold |
| 395 #else | 375 #else |
| 396 #try | 376 #try |
| 397 $ref_dict['percent_identity_threshold'] | 377 $ref_dict['percent_identity_threshold'] |
| 398 #except | 378 #except |
| 399 0.90 | 379 0.90 |
| 400 #end try | 380 #end try |
| 401 #end if | 381 #end if |
| 402 max_dist_pos = #slurp | |
| 403 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_dist_pos.__str__ != "" | |
| 404 $refGenomeSource.defuse_param.max_dist_pos | |
| 405 #else | |
| 406 #try | |
| 407 $ref_dict['max_dist_pos'] | |
| 408 #except | |
| 409 600 | |
| 410 #end try | |
| 411 #end if | |
| 412 num_dist_genes = #slurp | |
| 413 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.num_dist_genes.__str__ != "" | |
| 414 $refGenomeSource.defuse_param.num_dist_genes | |
| 415 #else | |
| 416 #try | |
| 417 $ref_dict['num_dist_genes'] | |
| 418 #except | |
| 419 500 | |
| 420 #end try | |
| 421 #end if | |
| 422 split_min_anchor = #slurp | 382 split_min_anchor = #slurp |
| 423 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_min_anchor.__str__ != "" | 383 #if $defuse_param.settings == "full" and $defuse_param.split_min_anchor.__str__ != "" |
| 424 $refGenomeSource.defuse_param.split_min_anchor | 384 $defuse_param.split_min_anchor |
| 425 #else | 385 #else |
| 426 #try | 386 #try |
| 427 $ref_dict['split_min_anchor'] | 387 $ref_dict['split_min_anchor'] |
| 428 #except | 388 #except |
| 429 4 | 389 4 |
| 430 #end try | 390 #end try |
| 431 #end if | 391 #end if |
| 432 max_concordant_ratio = #slurp | |
| 433 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_concordant_ratio.__str__ != "" | |
| 434 $refGenomeSource.defuse_param.max_concordant_ratio | |
| 435 #else | |
| 436 #try | |
| 437 $ref_dict['max_concordant_ratio'] | |
| 438 #except | |
| 439 0.1 | |
| 440 #end try | |
| 441 #end if | |
| 442 splice_bias = #slurp | 392 splice_bias = #slurp |
| 443 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.splice_bias.__str__ != "" | 393 #if $defuse_param.settings == "full" and $defuse_param.splice_bias.__str__ != "" |
| 444 $refGenomeSource.defuse_param.splice_bias | 394 $defuse_param.splice_bias |
| 445 #else | 395 #else |
| 446 #try | 396 #try |
| 447 $ref_dict['splice_bias'] | 397 $ref_dict['splice_bias'] |
| 448 #except | 398 #except |
| 449 10 | 399 10 |
| 450 #end try | 400 #end try |
| 451 #end if | 401 #end if |
| 452 denovo_assembly = #slurp | 402 denovo_assembly = #slurp |
| 453 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.denovo_assembly.__str__ != "" | 403 #if $defuse_param.settings == "full" and $defuse_param.denovo_assembly.__str__ != "" |
| 454 $refGenomeSource.defuse_param.denovo_assembly | 404 $defuse_param.denovo_assembly |
| 455 #else | 405 #else |
| 456 #try | 406 #try |
| 457 $ref_dict['denovo_assembly'] | 407 $ref_dict['denovo_assembly'] |
| 458 #except | 408 #except |
| 459 no | 409 no |
| 460 #end try | 410 #end try |
| 461 #end if | 411 #end if |
| 462 probability_threshold = #slurp | 412 probability_threshold = #slurp |
| 463 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.probability_threshold.__str__ != "" | 413 #if $defuse_param.settings == "full" and $defuse_param.probability_threshold.__str__ != "" |
| 464 $refGenomeSource.defuse_param.probability_threshold | 414 $defuse_param.probability_threshold |
| 465 #else | 415 #else |
| 466 #try | 416 #try |
| 467 $ref_dict['probability_threshold'] | 417 $ref_dict['probability_threshold'] |
| 468 #except | 418 #except |
| 469 0.50 | 419 0.50 |
| 471 #end if | 421 #end if |
| 472 positive_controls = \$(data_directory)/controls.txt | 422 positive_controls = \$(data_directory)/controls.txt |
| 473 | 423 |
| 474 # Position density when calculating covariance | 424 # Position density when calculating covariance |
| 475 covariance_sampling_density = #slurp | 425 covariance_sampling_density = #slurp |
| 476 #if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.covariance_sampling_density.__str__ != "" | 426 #if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != "" |
| 477 $refGenomeSource.defuse_param.covariance_sampling_density | 427 $defuse_param.covariance_sampling_density |
| 478 #else | 428 #else |
| 479 #try | 429 #try |
| 480 $ref_dict['covariance_sampling_density'] | 430 $ref_dict['covariance_sampling_density'] |
| 481 #except | 431 #except |
| 482 0.01 | 432 0.01 |
| 483 #end try | 433 #end try |
| 484 #end if | 434 #end if |
| 485 | |
| 486 | |
| 487 # Number of reads for each job in split | 435 # Number of reads for each job in split |
| 488 reads_per_job = 1000000 | 436 reads_per_job = #slurp |
| 489 | 437 #if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != "" |
| 490 # Number of regions for each breakpoint sequence job in split | 438 $defuse_param.reads_per_job |
| 491 regions_per_job = 20 | 439 #else |
| 440 #try | |
| 441 $ref_dict['reads_per_job'] | |
| 442 #except | |
| 443 1000000 | |
| 444 #end try | |
| 445 #end if | |
| 492 | 446 |
| 493 #raw | 447 #raw |
| 494 # If you have command line 'mail' and wish to be notified | 448 # If you have command line 'mail' and wish to be notified |
| 495 # mailto = andrew.mcpherson@gmail.com | 449 # mailto = andrew.mcpherson@gmail.com |
| 496 | 450 |
| 497 # Remove temp files | 451 # Remove temp files |
| 498 remove_job_files = yes | 452 remove_job_files = yes |
| 499 remove_job_temp_files = yes | 453 remove_job_temp_files = yes |
| 500 | 454 |
| 501 # Converting to fastq | |
| 502 # Fastq converter config format 1 for reads stored in separate files for each end | |
| 503 # data_lane_rexex_N is a perl regex which stores the lane id in $1 | |
| 504 # data_end_regex_N is a perl regex which stores the end, 1 or 2, in $1 | |
| 505 # data_compress_regex_N is a perl regex which stores the compression extension in $1 | |
| 506 # data_convert_N is the associated conversion utility that takes data at stdin and outputs fastq at stdout | |
| 507 # Fastq converter config format 2 for reads stored in separate files for each end | |
| 508 # data_lane_regex_N is a perl regex which stores the lane id in $1 | |
| 509 # data_compress_regex_N is a perl regex which stores the compression extension in $1 | |
| 510 # data_end1_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 1 at stdout | |
| 511 # data_end2_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 2 at stdout | |
| 512 | |
| 513 data_lane_regex_1 = ^(.+)_[12]_export\.txt.*$ | |
| 514 data_end_regex_1 = ^.+_([12])_export\.txt.*$ | |
| 515 data_compress_regex_1 = ^.+_[12]_export\.txt(.*)$ | |
| 516 data_converter_1 = $(scripts_directory)/fq_all2std.pl export2std | |
| 517 | |
| 518 data_lane_regex_2 = ^(.+)_[12]_concat_qseq\.txt.*$ | |
| 519 data_end_regex_2 = ^.+_([12])_concat_qseq\.txt.*$ | |
| 520 data_compress_regex_2 = ^.+_[12]_concat_qseq\.txt(.*)$ | |
| 521 data_converter_2 = $(scripts_directory)/qseq2fastq.pl | |
| 522 | |
| 523 data_lane_regex_3 = ^(.+)\.bam.*$ | |
| 524 data_compress_regex_3 = ^.+\.bam(.*)$ | |
| 525 data_end1_converter_3 = samtools view - | filter_sam_mate.pl 1 | sam_to_fastq.pl | |
| 526 data_end2_converter_3 = samtools view - | filter_sam_mate.pl 2 | sam_to_fastq.pl | |
| 527 | |
| 528 data_lane_regex_4 = ^(.+).[12].fastq.*$ | |
| 529 data_end_regex_4 = ^.+.([12]).fastq.*$ | |
| 530 data_compress_regex_4 = ^.+.[12].fastq(.*)$ | |
| 531 data_converter_4 = cat | |
| 532 #end raw | 455 #end raw |
| 533 | 456 |
| 534 #end if | |
| 535 | 457 |
| 536 </configfile> | 458 </configfile> |
| 537 <configfile name="shscript"> | 459 <configfile name="shscript"> |
| 538 #!/bin/bash | 460 #!/bin/bash |
| 539 ## define some things for cheetah proccessing | 461 ## define some things for cheetah proccessing |
| 600 #end if | 522 #end if |
| 601 ## run defuse.pl | 523 ## run defuse.pl |
| 602 perl \${DEFUSE_PATH}/scripts/defuse.pl -c $defuse_config -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p 8 | 524 perl \${DEFUSE_PATH}/scripts/defuse.pl -c $defuse_config -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir -p 8 |
| 603 ## copy primary results to output datasets | 525 ## copy primary results to output datasets |
| 604 if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi | 526 if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi |
| 605 if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi | 527 ## if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi |
| 606 if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi | 528 if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi |
| 607 if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi | 529 if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi |
| 608 ## create html with links for output_dir | 530 ## create html with links for output_dir |
| 609 #if $defuse_out.__str__ != 'None': | 531 #if $defuse_out.__str__ != 'None': |
| 610 if [ -e $defuse_out ] | 532 if [ -e $defuse_out ] |
| 648 | 570 |
| 649 DeFuse requires 2 fastq files for paried reads, one with the left mate of the paired reads, and a second fastq with the the right mate of the paired reads (**with reads in the same order as in the first fastq dataset**). | 571 DeFuse requires 2 fastq files for paried reads, one with the left mate of the paired reads, and a second fastq with the the right mate of the paired reads (**with reads in the same order as in the first fastq dataset**). |
| 650 | 572 |
| 651 If your fastq files have reads in different orders or include unpaired reads, you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq. | 573 If your fastq files have reads in different orders or include unpaired reads, you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq. |
| 652 | 574 |
| 653 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_: | 575 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.4_: |
| 654 - genome_fasta from Ensembl | 576 - genome_fasta from Ensembl |
| 655 - gene_models from Ensembl | 577 - gene_models from Ensembl |
| 656 - repeats_filename from UCSC RepeatMasker rmsk.txt | 578 - repeats_filename from UCSC RepeatMasker rmsk.txt |
| 657 - est_fasta from UCSC | 579 - est_fasta from UCSC |
| 658 - est_alignments from UCSC intronEst.txt | 580 - est_alignments from UCSC intronEst.txt |
| 659 - unigene_fasta from NCBI | 581 - unigene_fasta from NCBI |
| 660 | 582 |
| 661 .. _DeFuse_Version_0.6: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.1 | 583 .. _DeFuse_Version_0.4: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2 |
| 662 | 584 |
| 663 ------ | 585 ------ |
| 664 | 586 |
| 665 **Outputs** | 587 **Outputs** |
| 666 | 588 |
