changeset 19:1af6f32ff592

Add datamanager, move to defuse_reference.loc
author Jim Johnson <jj@umn.edu>
date Fri, 21 Jun 2013 14:46:11 -0500
parents 547d8db4673e
children b649c729be4c
files create_reference_dataset.xml data_manager_conf.xml datamanager_create_reference.py datamanager_create_reference.xml datatypes_conf.xml defuse.xml
diffstat 6 files changed, 591 insertions(+), 271 deletions(-) [+]
line wrap: on
line diff
--- a/create_reference_dataset.xml	Sat Jun 15 14:36:47 2013 -0500
+++ b/create_reference_dataset.xml	Fri Jun 21 14:46:11 2013 -0500
@@ -7,7 +7,7 @@
   <requirement type="package" version="2013-05-09">gmap</requirement>
   <requirement type="package" version="latest">kent</requirement>
  </requirements>
-  <command interpreter="command"> /bin/bash $shscript </command>
+  <command interpreter="command"> /bin/bash $defuse_script </command>
  <inputs>
   <conditional name="genome">
     <param name="choice" type="select" label="Select a Genome Build">
@@ -112,7 +112,7 @@
   </conditional>
  </inputs>
  <outputs>
-  <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/>
+  <data format="defuse.conf" name="config_txt" label="${tool.name} on ${genome.ensembl_genome_version} : config.txt"/>
  </outputs>
   <stdio>
     <exit_code range="1:"  level="fatal"   description="Error running Create DeFuse Reference" />
@@ -124,18 +124,17 @@
   </stdio>
  <configfiles>
   <configfile name="defuse_config">
-#import ast
 #
 # Configuration file for defuse
 #
-# At a minimum, change all values enclused by []
+# Variables that desiganate the PATH to an application, e.g. __SAMTOOLS_BIN__ 
+#   will be set by the runtime script using the ENV PATH
 #
 
 # Directory where the defuse code was unpacked
-## Default location in the tool/defuse directory  
-# source_directory = ${__root_dir__}/tools/defuse
 source_directory = __DEFUSE_PATH__
 
+# Organism IDs
 ensembl_organism = $genome.ensembl_organism
 ensembl_prefix = $genome.ensembl_prefix
 ensembl_version = $genome.ensembl_version
@@ -210,13 +209,6 @@
 data_directory       = $(source_directory)/data
 #end raw
 
-#raw
-# Bowtie parameters
-bowtie_threads                              = 1
-bowtie_quals                                = --phred33-quals
-max_insert_size                             = 500
-#end raw
-
 # Parameters for building the dataset
 chromosomes = $genome.chromosomes
 mt_chromosome = $genome.mt_chromosome
@@ -225,42 +217,12 @@
 rrna_gene_sources = $genome.rrna_gene_sources
 
 #raw
-# Blat sequences per job
-num_blat_sequences                          = 10000
-
-# Minimum gene fusion range
-dna_concordant_length                       = 2000
-
-# Trim length for discordant reads (split reads are not trimmed)
-discord_read_trim                           = 50
-
-# Calculate extra annotations, fusion splice index and interrupted index
-calculate_extra_annotations                 = no
-
-# Filtering parameters
-clustering_precision                        = 0.95
-span_count_threshold                        = 5
-percent_identity_threshold                  = 0.90
-split_min_anchor                            = 4
-splice_bias                                 = 10
-positive_controls                           = $(data_directory)/controls.txt
-probability_threshold                       = 0.50
-
-# Position density when calculating covariance
-covariance_sampling_density                 = 0.01
-
-# Number of reads for each job in split
-reads_per_job                               = 1000000
-
-# If you have command line 'mail' and wish to be notified
-mailto                                      = andrew.mcpherson@gmail.com
-
 # Remove temp files
 remove_job_files                            = yes
 remove_job_temp_files                       = yes
 #end raw
   </configfile>
-  <configfile name="shscript">
+  <configfile name="defuse_script">
 #!/bin/bash
 ## define some things for cheetah proccessing
 #set $amp = chr(38)
@@ -291,7 +253,7 @@
  <help>
 **DeFuse**
 
-DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.  See the DeFuse_Version_0.6.1_ manual for details.
+DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.  See the DeFuse_Version_0.6_ manual for details.
 
 DeFuse uses a Reference Dataset to search for gene fusions.  The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_:
     - genome_fasta from Ensembl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Fri Jun 21 14:46:11 2013 -0500
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<data_managers>
+  <data_manager tool_file="datamanager_create_reference.xml" id="data_manager_defuse_reference" >
+    <data_table name="defuse_reference">  <!-- Defines a Data Table to be modified. -->
+            <output> <!-- Handle the output of the Data Manager Tool -->
+                <column name="unique_id" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+                <column name="dbkey" />
+                <column name="name" />
+                <column name="config" output_ref="out_file" >  <!-- The value of this column will be modified based upon data in "out_file". example value "phiX.fa" -->
+                    <move type="directory"> <!-- Moving a file from the extra files path of "out_file" -->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/defuse</target> <!-- Target Location to store the file, directories are created as needed -->
+                    </move>
+                    <!-- datamanager_create_reference.py should have copied the defuse config file to the working directory.  
+                         so if we put the ${dbkey}.config path in this column,  defuse.xml can set the data_directory to this this directory.
+                     -->
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/defuse/${dbkey}.config</value_translation> <!-- Store this value in the final Data Table -->
+                </column>
+            </output>
+        </data_table>
+  </data_manager>
+</data_managers>
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datamanager_create_reference.py	Fri Jun 21 14:46:11 2013 -0500
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import re
+import tempfile
+import subprocess
+import fileinput
+import shutil
+import optparse
+import urllib2
+from ftplib import FTP
+import tarfile
+
+from galaxy.util.json import from_json_string, to_json_string
+
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit(1)
+
+def get_config_dict(config,dataset_directory=None):
+    keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources']
+    pat = '^([^=]+?)\s*=\s*(.*)$'
+    config_dict = {}
+    try:
+        fh = open(config)
+        for i,l in enumerate(fh):
+           line = l.strip() 
+           if line.startswith('#'):
+               continue
+           m = re.match(pat,line)
+           if m and len(m.groups()) == 2:
+               (k,v) = m.groups()
+               if k in keys:
+                   config_dict[k] = v
+    except Exception, e:
+        stop_err( 'Error parsing %s %s\n' % (config,str( e )) )
+    else:
+        fh.close()
+    if dataset_directory:
+        config_dict['dataset_directory'] = dataset_directory
+    return config_dict
+
+def run_defuse_script(data_manager_dict, params, target_directory, dbkey, description, config, script):
+    if not os.path.isdir(target_directory):
+        os.makedirs(target_directory)
+    ## Name the config consistently with data_manager_conf.xml
+    #  copy the config file to the target_directory
+    #  when DataManager moves files to there tool-data location, the config will get moved as well,
+    #   and the value_translation in data_manager_conf.xml will tell us the new location
+    #  defuse.xml will use the path to this config file to set the dataset_directory
+    config_name = '%s.config' % dbkey
+    defuse_config = os.path.join( target_directory, config_name)
+    shutil.copyfile(config,defuse_config) 
+    cmd = "/bin/bash %s %s" % (script,target_directory)
+    # Run
+    try:
+        tmp_out = tempfile.NamedTemporaryFile().name
+        tmp_stdout = open( tmp_out, 'wb' )
+        tmp_err = tempfile.NamedTemporaryFile().name
+        tmp_stderr = open( tmp_err, 'wb' )
+        proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
+        returncode = proc.wait()
+        tmp_stderr.close()
+        # get stderr, allowing for case where it's very large
+        tmp_stderr = open( tmp_err, 'rb' )
+        stderr = ''
+        buffsize = 1048576
+        try:
+            while True:
+                stderr += tmp_stderr.read( buffsize )
+                if not stderr or len( stderr ) % buffsize != 0:
+                    break
+        except OverflowError:
+            pass
+        tmp_stdout.close()
+        tmp_stderr.close()
+        if returncode != 0:
+            raise Exception, stderr
+
+        # TODO: look for errors in program output.
+    except Exception, e:
+        stop_err( 'Error creating defuse reference:\n' + str( e ) )
+    config_dict = get_config_dict(config, dataset_directory=target_directory)
+    data_table_entry = dict(unique_id=dbkey, dbkey=dbkey, name=description, path=config_name)
+    _add_data_table_entry( data_manager_dict, data_table_entry )
+def _add_data_table_entry( data_manager_dict, data_table_entry ):
+    data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
+    data_manager_dict['data_tables']['defuse'] = data_manager_dict['data_tables'].get( 'defuse', [] )
+    data_manager_dict['data_tables']['defuse'].append( data_table_entry )
+    return data_manager_dict
+
+def main():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option( '-k', '--dbkey', dest='dbkey', action='store', type="string", default=None, help='dbkey' )
+    parser.add_option( '-d', '--description', dest='description', action='store', type="string", default=None, help='description' )
+    parser.add_option( '-c', '--defuse_config', dest='defuse_config', action='store', type="string", default=None, help='defuse_config' )
+    parser.add_option( '-s', '--defuse_script', dest='defuse_script', action='store', type="string", default=None, help='defuse_script' )
+    (options, args) = parser.parse_args()
+
+    filename = args[0]
+
+    params = from_json_string( open( filename ).read() )
+    target_directory = params[ 'output_data' ][0]['extra_files_path']
+    os.mkdir( target_directory )
+    data_manager_dict = {}
+
+     
+    #Create Defuse Reference Data
+    run_defuse_script( data_manager_dict, params, target_directory, options.dbkey, options.description,options.defuse_config,options.defuse_script)
+
+    #save info to json file
+    open( filename, 'wb' ).write( to_json_string( data_manager_dict ) )
+
+if __name__ == "__main__": main()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datamanager_create_reference.xml	Fri Jun 21 14:46:11 2013 -0500
@@ -0,0 +1,289 @@
+<tool id="data_manager_defuse_reference" name="DeFuse Reference DataManager" version="1.6.1" tool_type="manage_data">
+ <description>create a defuse reference from Ensembl and UCSC sources</description>
+ <requirements>
+  <requirement type="package" version="0.6.1">defuse</requirement>
+  <requirement type="package" version="0.1.18">samtools</requirement>
+  <requirement type="package" version="1.0.0">bowtie</requirement>
+  <requirement type="package" version="2013-05-09">gmap</requirement>
+  <requirement type="package" version="latest">kent</requirement>
+ </requirements>
+ <command interpreter="python"> datamanager_create_reference.py 
+    --dbkey $genome.ensembl_genome_version 
+    --description "$genome.ensembl_prefix $genome.ensembl_genome_version ($genome.ucsc_genome_version)"
+    --defuse_config $defuse_config
+    --defuse_script $defuse_script
+    $out_file
+ </command>
+ <inputs>
+  <conditional name="genome">
+    <param name="choice" type="select" label="Select a Genome Build">
+      <option value="GRCh37">Homo_sapiens GRCh37  hg19</option>
+      <option value="NCBI36">Homo_sapiens NCBI36 hg18</option>
+      <option value="GRCm38">Mus_musculus GRCm38 mm10</option>
+      <option value="NCBIM37">Mus_musculus NCBIM37 mm9</option>
+      <option value="Rnor_5.0">Rattus_norvegicus Rnor_5.0 rn5</option>
+      <option value="user_specified">User specified</option>
+    </param>
+    <when value="GRCh37">
+      <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
+      <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
+      <param name="ensembl_genome_version" type="hidden" value="GRCh37"/>
+      <param name="ensembl_version" type="hidden" value="71"/>
+      <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/>
+      <param name="ncbi_prefix" type="hidden" value="Hs"/>
+      <param name="ucsc_genome_version" type="hidden" value="hg19"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="NCBI36">
+      <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
+      <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
+      <param name="ensembl_genome_version" type="hidden" value="NCBI36"/>
+      <param name="ensembl_version" type="hidden" value="54"/>
+      <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/>
+      <param name="ncbi_prefix" type="hidden" value="Hs"/>
+      <param name="ucsc_genome_version" type="hidden" value="hg18"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="GRCm38">
+      <param name="ensembl_organism" type="hidden" value="mus_musculus"/>
+      <param name="ensembl_prefix" type="hidden" value="Mus_musculus"/>
+      <param name="ensembl_genome_version" type="hidden" value="GRCm38"/>
+      <param name="ensembl_version" type="hidden" value="71"/>
+      <param name="ncbi_organism" type="hidden" value="Mus_musculus"/>
+      <param name="ncbi_prefix" type="hidden" value="Mm"/>
+      <param name="ucsc_genome_version" type="hidden" value="mm10"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="NCBIM37">
+      <param name="ensembl_organism" type="hidden" value="mus_musculus"/>
+      <param name="ensembl_prefix" type="hidden" value="Mus_musculus"/>
+      <param name="ensembl_genome_version" type="hidden" value="NCBIM37"/>
+      <param name="ensembl_version" type="hidden" value="67"/>
+      <param name="ncbi_organism" type="hidden" value="Mus_musculus"/>
+      <param name="ncbi_prefix" type="hidden" value="Mm"/>
+      <param name="ucsc_genome_version" type="hidden" value="mm9"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="Rnor_5.0">
+      <param name="ensembl_organism" type="hidden" value="rattus_norvegicus"/>
+      <param name="ensembl_prefix" type="hidden" value="Rattus_norvegicus"/>
+      <param name="ensembl_genome_version" type="hidden" value="Rnor_5.0"/>
+      <param name="ensembl_version" type="hidden" value="71"/>
+      <param name="ncbi_organism" type="hidden" value="Rattus_norvegicus"/>
+      <param name="ncbi_prefix" type="hidden" value="Rn"/>
+      <param name="ucsc_genome_version" type="hidden" value="rn5"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
+    <when value="user_specified">
+      <param name="ensembl_organism" type="text" value="" label="Ensembl Organism Name" help="Examples: homo_sapiens, mus_musculus, rattus_norvegicus"/>
+      <param name="ensembl_prefix" type="text" value="" label="Ensembl Organism prefix" help="Examples: Homo_sapiens, Mus_musculus, Rattus_norvegicus"/>
+      <param name="ensembl_genome_version" type="text" value="" label="Ensembl Genome Version" help="Examples: GRCh37, GRCm38, Rnor_5.0"/>
+      <param name="ensembl_version" type="integer" value="" label="Ensembl Release Version" help="Example: 71"/>
+      <param name="ncbi_organism" type="text" value="" label="NCBI Organism Name" help="Examples: Homo_sapiens, Mus_musculus, Rattus_norvegicus"/>
+      <param name="ncbi_prefix" type="text" value="" label="NCBI Organism Unigene prefix" help="Examples: Hs, Mm, Rn"/>
+      <param name="ucsc_genome_version" type="text" value="" label="UCSC Genome Version" help="Examples: hg19, mm10, rn5"/>
+      <param name="chromosomes" type="text" value="" label="Chromosomes for Ensembl genome build" >
+       <help>  Examples: 
+         Homo_sapiens: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT
+         Mus_musculus: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT
+         Rattus_norvegicus: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,MT
+         ( ftp://ftp.ensembl.org/pub/release-71/fasta/homo_sapiens/dna/ )
+       </help>
+      </param>
+      <param name="mt_chromosome" type="text" value="MT" label="Ensembl Mitochonrial Chromosome name" />
+      <param name="gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding" label="Gene sources" />
+      <param name="ig_gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene" label="IG Gene sources" />
+      <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" />
+    </when>
+  </conditional>
+ </inputs>
+ <outputs>
+  <data name="out_file" format="data_manager_json"/>
+ </outputs>
+  <stdio>
+    <exit_code range="1:"  level="fatal"   description="Error running Create DeFuse Reference" />
+    <regex match="Error:" 
+           source="both" 
+           level="fatal" 
+           description="Error running Create DeFuse Reference" />
+
+  </stdio>
+ <configfiles>
+  <configfile name="defuse_config">
+#
+# Configuration file for defuse
+#
+# Variables that desiganate the PATH to an application, e.g. __SAMTOOLS_BIN__ 
+#   will be set by the runtime script using the ENV PATH
+#
+
+# Directory where the defuse code was unpacked
+source_directory = __DEFUSE_PATH__
+
+# Organism IDs
+ensembl_organism = $genome.ensembl_organism
+ensembl_prefix = $genome.ensembl_prefix
+ensembl_version = $genome.ensembl_version
+ensembl_genome_version = $genome.ensembl_genome_version
+ucsc_genome_version = $genome.ucsc_genome_version
+ncbi_organism = $genome.ncbi_organism
+ncbi_prefix = $genome.ncbi_prefix
+
+# Directory where you want your dataset
+dataset_directory = __DATASET_DIRECTORY__
+
+#raw
+# Input genome and gene models
+gene_models                                 = $(dataset_directory)/$(ensembl_prefix).$(ensembl_genome_version).$(ensembl_version).gtf
+genome_fasta                                = $(dataset_directory)/$(ensembl_prefix).$(ensembl_genome_version).$(ensembl_version).dna.chromosomes.fa
+
+# Repeat table from ucsc genome browser
+repeats_filename                            = $(dataset_directory)/repeats.txt
+
+# EST info downloaded from ucsc genome browser
+est_fasta                                   = $(dataset_directory)/est.fa
+est_alignments                              = $(dataset_directory)/intronEst.txt
+
+# Unigene clusters downloaded from ncbi
+unigene_fasta                               = $(dataset_directory)/$(ncbi_prefix).seq.uniq
+#end raw
+
+# Paths to external tools
+samtools_bin =  __SAMTOOLS_BIN__
+bowtie_bin = __BOWTIE_BIN__
+bowtie_build_bin = __BOWTIE_BUILD_BIN__
+blat_bin = __BLAT_BIN__
+fatotwobit_bin = __FATOTWOBIT_BIN__
+gmap_bin = __GMAP_BIN__
+gmap_setup_bin = __GMAP_SETUP_BIN__
+r_bin = __R_BIN__
+rscript_bin = __RSCRIPT_BIN__
+
+#raw
+# Directory where you want your dataset
+gmap_index_directory                        = $(dataset_directory)/gmap
+#end raw
+
+#raw
+# Dataset files
+dataset_prefix       = $(dataset_directory)/defuse
+chromosome_prefix    = $(dataset_prefix).dna.chromosomes
+exons_fasta          = $(dataset_prefix).exons.fa
+cds_fasta            = $(dataset_prefix).cds.fa
+cdna_regions         = $(dataset_prefix).cdna.regions
+cdna_fasta           = $(dataset_prefix).cdna.fa
+reference_fasta      = $(dataset_prefix).reference.fa
+rrna_fasta           = $(dataset_prefix).rrna.fa
+ig_gene_list         = $(dataset_prefix).ig.gene.list
+repeats_regions      = $(dataset_directory)/repeats.regions
+est_split_fasta1     = $(dataset_directory)/est.1.fa
+est_split_fasta2     = $(dataset_directory)/est.2.fa
+est_split_fasta3     = $(dataset_directory)/est.3.fa
+est_split_fasta4     = $(dataset_directory)/est.4.fa
+est_split_fasta5     = $(dataset_directory)/est.5.fa
+est_split_fasta6     = $(dataset_directory)/est.6.fa
+est_split_fasta7     = $(dataset_directory)/est.7.fa
+est_split_fasta8     = $(dataset_directory)/est.8.fa
+est_split_fasta9     = $(dataset_directory)/est.9.fa
+
+# Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs
+prefilter1           = $(unigene_fasta)
+
+# deFuse scripts and tools
+scripts_directory    = $(source_directory)/scripts
+tools_directory      = $(source_directory)/tools
+data_directory       = $(source_directory)/data
+#end raw
+
+# Parameters for building the dataset
+chromosomes = $genome.chromosomes
+mt_chromosome = $genome.mt_chromosome
+gene_sources = $genome.gene_sources
+ig_gene_sources = $genome.ig_gene_sources
+rrna_gene_sources = $genome.rrna_gene_sources
+
+#raw
+# Remove temp files
+remove_job_files                            = yes
+remove_job_temp_files                       = yes
+#end raw
+  </configfile>
+  <configfile name="defuse_script">
+#!/bin/bash
+## define some things for cheetah proccessing
+#set $amp = chr(38)
+#set $gt = chr(62)
+## substitute pathnames into config file
+if `grep __DATASET_DIRECTORY__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DATASET_DIRECTORY__#\$1#" $defuse_config; fi
+if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi
+if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi
+if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi
+if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi
+if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi
+if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi
+if `grep __GMAP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_BIN=`which gmap`;then sed -i'.tmp' "s#__GMAP_BIN__#\${GMAP_BIN}#" $defuse_config; fi
+if `grep __GMAP_SETUP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_SETUP_BIN=`which gmap_setup`;then sed -i'.tmp' "s#__GMAP_SETUP_BIN__#\${GMAP_SETUP_BIN}#" $defuse_config; fi
+if `grep __GMAP_INDEX_DIR__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_INDEX_DIR=`pwd`/gmap;then sed -i'.tmp' "s#__GMAP_INDEX_DIR__#\${GMAP_INDEX_DIR}#" $defuse_config; fi
+if `grep __R_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} R_BIN=`which R`;then sed -i'.tmp' "s#__R_BIN__#\${R_BIN}#" $defuse_config; fi
+if `grep __RSCRIPT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} RSCRIPT_BIN=`which Rscript`;then sed -i'.tmp' "s#__RSCRIPT_BIN__#\${RSCRIPT_BIN}#" $defuse_config; fi
+## copy config to output
+cp $defuse_config \$1/defuse_config.txt
+## Run the create_reference_dataset.pl
+perl \${DEFUSE_PATH}/scripts/create_reference_dataset.pl -c $defuse_config 
+  </configfile>
+ </configfiles>
+
+ <tests>
+ </tests>
+ <help>
+**DeFuse**
+
+DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.  See the DeFuse_Version_0.6_ manual for details.
+
+DeFuse uses a Reference Dataset to search for gene fusions.  The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_:
+    - genome_fasta from Ensembl
+    - gene_models from Ensembl
+    - repeats_filename from UCSC RepeatMasker rmsk.txt
+    - est_fasta from UCSC
+    - est_alignments from UCSC intronEst.txt
+    - unigene_fasta from NCBI
+
+The create_defuse_reference Galaxy tool downloads the reference genome and other source files, and builds any derivative files including bowtie indices, gmap indices, and 2bit files. Expect this step to take at least 12 hours.
+
+
+It will generate the refernce data for deFuse Galaxy tool.  
+
+Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138
+
+.. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
+
+.. _DeFuse_Version_0.6: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.1
+
+------
+
+**Outputs**
+
+The galaxy history will contain: the config.txt file that provides DeFuse with the reference data paths.  
+
+ </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Fri Jun 21 14:46:11 2013 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<datatypes>
+    <registration>
+        <datatype extension="defuse.conf" type="galaxy.datatypes.data:Text" subclass="True"/>
+    </registration>
+</datatypes>
--- a/defuse.xml	Sat Jun 15 14:36:47 2013 -0500
+++ b/defuse.xml	Fri Jun 21 14:46:11 2013 -0500
@@ -12,66 +12,66 @@
   <param name="left_pairendreads" type="data" format="fastq" label="left part of read pairs" help="The left and right reads pairs must be in the same order, and not have any unpaired reads.  (FASTQ interlacer will pair reads and remove the unpaired.   FASTQ de-interlacer will separate the result into left and right reads.)"/>
   <param name="right_pairendreads" type="data" format="fastq" label="right part of read pairs" help="In the same order as the left reads"/>
   <conditional name="refGenomeSource">
-      <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help="">
-        <option value="indexed">Use a built-in DeFuse Reference Dataset</option>
-        <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option>
+    <param name="genomeSource" type="select" label="Will you select a built-in DeFuse Reference Dataset, or supply a configuration from your history" help="">
+      <option value="indexed">Use a built-in DeFuse Reference Dataset</option>
+      <option value="history">Use a configuration from your history that specifies the DeFuse Reference Dataset</option>
+    </param>
+    <when value="indexed">
+      <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team">
+        <options from_file="defuse_reference.loc">
+          <column name="name" index="1"/>
+          <column name="value" index="2"/>
+          <filter type="sort_by" column="0" />
+          <validator type="no_options" message="No indexes are available" />
+        </options>
       </param>
-      <when value="indexed">
-        <param name="index" type="select" label="Select a Reference Dataset" help="if your genome of interest is not listed - contact Galaxy team">
-          <options from_file="defuse.loc">
-            <column name="name" index="1"/>
-            <column name="value" index="2"/>
-            <filter type="sort_by" column="0" />
-            <validator type="no_options" message="No indexes are available" />
-          </options>
-        </param>
-        <conditional name="defuse_param">
-          <param name="settings" type="select" label="Defuse parameter settings" help="">
-            <option value="preSet">Default settings</option>
-            <option value="full">Full parameter list</option>
-          </param>
-          <when value="preSet" />
-          <when value="full">
-            <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" />
-            <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" />
-            <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" />
-            <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision">
-              <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
-            </param>
-            <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" />
-            <param name="split_count_threshold" type="integer" value="3" optional="true" label="Filter split_count_threshold" />
-            <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold">
-              <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
-            </param>
-            <param name="max_dist_pos" type="integer" value="600" optional="true" label="Filter max_dist_pos" />
-            <param name="num_dist_genes" type="integer" value="500" optional="true" label="Filter num_dist_genes" />
-            <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" />
-            <param name="max_concordant_ratio" type="float" value="0.1" optional="true" label="Filter max_concordant_ratio">
-              <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
-            </param>
-            <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" />
-            <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold">
-              <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
-            </param>
-            <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density">
-              <help>Position density when calculating covariance</help>
-              <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
-            </param>
-            <param name="denovo_assembly" type="select" label="denovo_assembly" help="">
-              <option value="">Use Default</option>
-              <option value="no">no</option>
-              <option value="yes">yes</option>
-            </param>
-            <!--
-              <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/>
-            -->
-          </when> <!-- full -->
-        </conditional>  <!-- defuse_param -->
-      </when>
-      <when value="history">
-        <param name="config" type="data" format="txt" label="Defuse Config file" help=""/>
-      </when>  <!-- history -->
+    </when>
+    <when value="history">
+      <param name="config" type="data" format="defuse.conf" label="Defuse Config file" help=""/>
+    </when>  <!-- history -->
   </conditional>  <!-- refGenomeSource -->
+  <conditional name="defuse_param">
+    <param name="settings" type="select" label="Defuse parameter settings" help="">
+      <option value="preSet">Default settings</option>
+      <option value="full">Full parameter list</option>
+    </param>
+    <when value="preSet" />
+    <when value="full">
+      <param name="max_insert_size" type="integer" value="500" optional="true" label="Bowtie max_insert_size" />
+      <param name="dna_concordant_length" type="integer" value="2000" optional="true" label="Minimum gene fusion range dna_concordant_length" />
+      <param name="discord_read_trim" type="integer" value="50" optional="true" label="Trim length for discordant reads discord_read_trim" help="(split reads are not trimmed)" />
+      <param name="calculate_extra_annotations" type="select" label="Calculate extra annotations, fusion splice index and interrupted index" help="">
+        <option value="">Use Default</option>
+        <option value="no">no</option>
+        <option value="yes">yes</option>
+      </param>
+      <param name="clustering_precision" type="float" value=".95" optional="true" label="Filter clustering_precision">
+        <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
+      </param>
+      <param name="span_count_threshold" type="integer" value="5" optional="true" label="Filter span_count_threshold" />
+      <param name="percent_identity_threshold" type="float" value=".90" optional="true" label="Filter percent_identity_threshold">
+        <validator type="in_range" message="Choose a value between .1 and 1.0" min=".1" max="1"/>
+      </param>
+      <param name="split_min_anchor" type="integer" value="4" optional="true" label="Filter split_min_anchor" />
+      <param name="splice_bias" type="integer" value="10" optional="true" label="Filter splice_bias" />
+      <param name="probability_threshold" type="float" value="0.50" optional="true" label="Filter probability_threshold">
+        <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
+      </param>
+      <param name="covariance_sampling_density" type="float" value="0.01" optional="true" label="covariance_sampling_density">
+        <help>Position density when calculating covariance</help>
+        <validator type="in_range" message="Choose a value between 0.0 and 1.0" min="0" max="1"/>
+      </param>
+      <param name="denovo_assembly" type="select" label="denovo_assembly" help="">
+        <option value="">Use Default</option>
+        <option value="no">no</option>
+        <option value="yes">yes</option>
+      </param>
+      <!--
+        <param name="positive_controls" type="data" format="txt" optional=true label="Defuse positive_controls" help=""/>
+      -->
+      <param name="reads_per_job" type="integer" value="1000000" optional="true" label="Number of reads for each job in split" />
+    </when> <!-- full -->
+  </conditional>  <!-- defuse_param -->
   <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" 
          help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, 
                but they require considerable diskspace, and should be deleted and purged when no longer needed."/>
@@ -83,20 +83,50 @@
   <data format="html" name="defuse_out" label="${tool.name} on ${on_string}: defuse_output (purge when no longer needed)">
     <filter>keep_output == True</filter>
   </data>
-  <data format="tabular" name="results_tsv" label="${tool.name} on ${on_string}: results.tsv" />
   <data format="tabular" name="results_classify_tsv" label="${tool.name} on ${on_string}: results.classify.tsv" />
   <data format="tabular" name="results_filtered_tsv" label="${tool.name} on ${on_string}: results.filtered.tsv" />
   <data format="html" name="fusion_reads" label="${tool.name} on ${on_string}: fusion_reads">
     <filter>do_get_reads == True</filter>
   </data>
+  <!--
+   expression_plot
+   circos plot
+  -->
  </outputs>
  <configfiles>
   <configfile name="defuse_config">
-#import ast
+#import re
+#set $ds = chr(36)
 #if $refGenomeSource.genomeSource == "history":
-#include raw $refGenomeSource.config.__str__
+#set config_file = $refGenomeSource.config.__str__
+#set 
 #else 
-#set $ref_dict = dict($ast.literal_eval($refGenomeSource.index.value))
+#set config_file = $refGenomeSource.index.value
+#end if
+#set pat = '^\s*([^#=][^=]*?)\s*=\s*(.*?)\s*$'
+#set fh = open()
+#set keys = ['dataset_directory','ensembl_organism','ensembl_prefix','ensembl_version','ensembl_genome_version','ucsc_genome_version','ncbi_organism','ncbi_prefix','chromosomes','mt_chromosome','gene_sources','ig_gene_sources','rrna_gene_sources']
+#set kv = []
+#for $line in $fh:
+  #set m = $re.match($pat,$line)
+  #if $m and len($m.groups()) == 2:
+    ## #echo $line
+    #if $m.groups()[0] in keys:
+      #set k = $m.groups()[0]
+      #if k == 'dataset_directory' and $refGenomeSource.genomeSource == "indexed":
+        ## The DataManager is conifgured to place the config file in the same directory as the defuse_data: dataset_directory
+        #set v = $os.path.dirname($config_file)
+      #else:
+        #set v = $m.groups()[1]
+      #end if
+      #set kv = $kv + [[$k, $v]]
+    #end if
+  #end if
+#end for
+## #echo $kv
+#set ref_dict = dict($kv)
+## #echo $ref_dict
+## include raw $refGenomeSource.config.__str__
 #
 # Configuration file for defuse
 #
@@ -106,12 +136,7 @@
 # Directory where the defuse code was unpacked
 ## Default location in the tool/defuse directory  
 # source_directory = ${__root_dir__}/tools/defuse
-source_directory = #slurp
-#try
-$ref_dict['source_directory']
-#except
-__DEFUSE_PATH__
-#end try
+source_directory = __DEFUSE_PATH__
 
 # Directory where you want your dataset
 dataset_directory = #slurp
@@ -166,60 +191,15 @@
 #end try
 
 # Paths to external tools
-bowtie_bin = #slurp
-#try
-$ref_dict['bowtie_bin']
-#except
-__BOWTIE_BIN__
-#end try
-bowtie_build_bin = #slurp
-#try
-$ref_dict['bowtie_build_bin']
-#except
-__BOWTIE_BUILD_BIN__
-#end try
-blat_bin = #slurp
-#try
-$ref_dict['blat_bin']
-#except
-__BLAT_BIN__
-#end try
-fatotwobit_bin = #slurp
-#try
-$ref_dict['fatotwobit_bin']
-#except
-__FATOTWOBIT_BIN__
-#end try
-gmap_bin = #slurp
-#try
-$ref_dict['gmap_bin']
-#except
-__GMAP_BIN__
-#end try
-gmap_bin = #slurp
-#try
-$ref_dict['gmap_bin']
-#except
-__GMAP_BIN__
-#end try
-gmap_setup_bin = #slurp
-#try
-$ref_dict['gmap_setup_bin']
-#except
-__GMAP_SETUP_BIN__
-#end try
-r_bin = #slurp
-#try
-$ref_dict['r_bin']
-#except
-__R_BIN__
-#end try
-rscript_bin = #slurp
-#try
-$ref_dict['rscript_bin']
-#except
-__RSCRIPT_BIN__
-#end try
+bowtie_bin = __BOWTIE_BIN__
+bowtie_build_bin = __BOWTIE_BUILD_BIN__
+blat_bin = __BLAT_BIN__
+fatotwobit_bin = __FATOTWOBIT_BIN__
+gmap_bin = __GMAP_BIN__
+gmap_bin = __GMAP_BIN__
+gmap_setup_bin = __GMAP_SETUP_BIN__
+r_bin = __R_BIN__
+rscript_bin = __RSCRIPT_BIN__
 
 # Directory where you want your dataset
 gmap_index_directory = #slurp
@@ -284,8 +264,8 @@
 --phred33-quals
 #end try
 max_insert_size = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_insert_size.__str__ != "":
-$refGenomeSource.defuse_param.max_insert_size
+#if $defuse_param.settings == "full" and $defuse_param.max_insert_size.__str__ != "":
+$defuse_param.max_insert_size
 #else
 #try
 $ref_dict['max_insert_size']
@@ -336,8 +316,8 @@
 
 # Minimum gene fusion range
 dna_concordant_length = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.dna_concordant_length.__str__ != "":
-$refGenomeSource.defuse_param.dna_concordant_length
+#if $defuse_param.settings == "full" and $defuse_param.dna_concordant_length.__str__ != "":
+$defuse_param.dna_concordant_length
 #else
 #try
 $ref_dict['dna_concordant_length']
@@ -348,8 +328,8 @@
 
 # Trim length for discordant reads (split reads are not trimmed)
 discord_read_trim = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.discord_read_trim.__str__ != "":
-$refGenomeSource.defuse_param.discord_read_trim
+#if $defuse_param.settings == "full" and $defuse_param.discord_read_trim.__str__ != "":
+$defuse_param.discord_read_trim
 #else
 #try
 $ref_dict['discord_read_trim']
@@ -357,11 +337,21 @@
 50
 #end try
 #end if
-
+# Calculate extra annotations, fusion splice index and interrupted index
+calculate_extra_annotations = #slurp
+#if $defuse_param.settings == "full" and $defuse_param.calculate_extra_annotations.__str__ != "":
+$defuse_param.calculate_extra_annotations
+#else
+#try
+$ref_dict['calculate_extra_annotations']
+#except
+no
+#end try
+#end if
 # Filtering parameters
 clustering_precision = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.clustering_precision.__str__ != ""
-$refGenomeSource.defuse_param.clustering_precision
+#if $defuse_param.settings == "full" and $defuse_param.clustering_precision.__str__ != ""
+$defuse_param.clustering_precision
 #else
 #try
 $ref_dict['clustering_precision']
@@ -370,8 +360,8 @@
 #end try
 #end if
 span_count_threshold = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.span_count_threshold.__str__ != ""
-$refGenomeSource.defuse_param.span_count_threshold
+#if $defuse_param.settings == "full" and $defuse_param.span_count_threshold.__str__ != ""
+$defuse_param.span_count_threshold
 #else
 #try
 $ref_dict['span_count_threshold']
@@ -379,19 +369,9 @@
 5
 #end try
 #end if
-split_count_threshold = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_count_threshold.__str__ != ""
-$refGenomeSource.defuse_param.split_count_threshold
-#else
-#try
-$ref_dict['split_count_threshold']
-#except
-3
-#end try
-#end if
 percent_identity_threshold = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.percent_identity_threshold.__str__ != ""
-$refGenomeSource.defuse_param.percent_identity_threshold
+#if $defuse_param.settings == "full" and $defuse_param.percent_identity_threshold.__str__ != ""
+$defuse_param.percent_identity_threshold
 #else
 #try
 $ref_dict['percent_identity_threshold']
@@ -399,29 +379,9 @@
 0.90
 #end try
 #end if
-max_dist_pos = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_dist_pos.__str__ != ""
-$refGenomeSource.defuse_param.max_dist_pos
-#else
-#try
-$ref_dict['max_dist_pos']
-#except
-600
-#end try
-#end if
-num_dist_genes = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.num_dist_genes.__str__ != ""
-$refGenomeSource.defuse_param.num_dist_genes
-#else
-#try
-$ref_dict['num_dist_genes']
-#except
-500
-#end try
-#end if
 split_min_anchor = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.split_min_anchor.__str__ != ""
-$refGenomeSource.defuse_param.split_min_anchor
+#if $defuse_param.settings == "full" and $defuse_param.split_min_anchor.__str__ != ""
+$defuse_param.split_min_anchor
 #else
 #try
 $ref_dict['split_min_anchor']
@@ -429,19 +389,9 @@
 4
 #end try
 #end if
-max_concordant_ratio = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.max_concordant_ratio.__str__ != ""
-$refGenomeSource.defuse_param.max_concordant_ratio
-#else
-#try
-$ref_dict['max_concordant_ratio']
-#except
-0.1
-#end try
-#end if
 splice_bias = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.splice_bias.__str__ != ""
-$refGenomeSource.defuse_param.splice_bias
+#if $defuse_param.settings == "full" and $defuse_param.splice_bias.__str__ != ""
+$defuse_param.splice_bias
 #else
 #try
 $ref_dict['splice_bias']
@@ -450,8 +400,8 @@
 #end try
 #end if
 denovo_assembly = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.denovo_assembly.__str__ != ""
-$refGenomeSource.defuse_param.denovo_assembly
+#if $defuse_param.settings == "full" and $defuse_param.denovo_assembly.__str__ != ""
+$defuse_param.denovo_assembly
 #else
 #try
 $ref_dict['denovo_assembly']
@@ -460,8 +410,8 @@
 #end try
 #end if
 probability_threshold = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.probability_threshold.__str__ != ""
-$refGenomeSource.defuse_param.probability_threshold
+#if $defuse_param.settings == "full" and $defuse_param.probability_threshold.__str__ != ""
+$defuse_param.probability_threshold
 #else
 #try
 $ref_dict['probability_threshold']
@@ -473,8 +423,8 @@
 
 # Position density when calculating covariance
 covariance_sampling_density = #slurp
-#if $refGenomeSource.defuse_param.settings == "full" and $refGenomeSource.defuse_param.covariance_sampling_density.__str__ != ""
-$refGenomeSource.defuse_param.covariance_sampling_density
+#if $defuse_param.settings == "full" and $defuse_param.covariance_sampling_density.__str__ != ""
+$defuse_param.covariance_sampling_density
 #else
 #try
 $ref_dict['covariance_sampling_density']
@@ -482,13 +432,17 @@
 0.01
 #end try
 #end if
-
-
 # Number of reads for each job in split
-reads_per_job                               = 1000000
-
-# Number of regions for each breakpoint sequence job in split
-regions_per_job                             = 20
+reads_per_job = #slurp
+#if $defuse_param.settings == "full" and $defuse_param.reads_per_job.__str__ != ""
+$defuse_param.reads_per_job
+#else
+#try
+$ref_dict['reads_per_job']
+#except
+1000000
+#end try
+#end if
 
 #raw
 # If you have command line 'mail' and wish to be notified
@@ -498,40 +452,8 @@
 remove_job_files                            = yes
 remove_job_temp_files                       = yes
 
-# Converting to fastq
-# Fastq converter config format 1 for reads stored in separate files for each end
-#  data_lane_rexex_N is a perl regex which stores the lane id in $1
-#  data_end_regex_N is a perl regex which stores the end, 1 or 2, in $1
-#  data_compress_regex_N is a perl regex which stores the compression extension in $1
-#  data_convert_N is the associated conversion utility that takes data at stdin and outputs fastq at stdout
-# Fastq converter config format 2 for reads stored in separate files for each end
-#  data_lane_regex_N is a perl regex which stores the lane id in $1
-#  data_compress_regex_N is a perl regex which stores the compression extension in $1
-#  data_end1_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 1 at stdout
-#  data_end2_converter_N is the associated conversion utility that takes data at stdin and outputs fastq for end 2 at stdout
-
-data_lane_regex_1                           = ^(.+)_[12]_export\.txt.*$
-data_end_regex_1                            = ^.+_([12])_export\.txt.*$
-data_compress_regex_1                       = ^.+_[12]_export\.txt(.*)$
-data_converter_1                            = $(scripts_directory)/fq_all2std.pl export2std
-
-data_lane_regex_2                           = ^(.+)_[12]_concat_qseq\.txt.*$
-data_end_regex_2                            = ^.+_([12])_concat_qseq\.txt.*$
-data_compress_regex_2                       = ^.+_[12]_concat_qseq\.txt(.*)$
-data_converter_2                            = $(scripts_directory)/qseq2fastq.pl
-
-data_lane_regex_3                           = ^(.+)\.bam.*$
-data_compress_regex_3                       = ^.+\.bam(.*)$
-data_end1_converter_3                       = samtools view - | filter_sam_mate.pl 1 | sam_to_fastq.pl
-data_end2_converter_3                       = samtools view - | filter_sam_mate.pl 2 | sam_to_fastq.pl
-
-data_lane_regex_4                           = ^(.+).[12].fastq.*$
-data_end_regex_4                            = ^.+.([12]).fastq.*$
-data_compress_regex_4                       = ^.+.[12].fastq(.*)$
-data_converter_4                            = cat
 #end raw
 
-#end if
 
   </configfile>
   <configfile name="shscript">
@@ -602,7 +524,7 @@
 perl \${DEFUSE_PATH}/scripts/defuse.pl -c $defuse_config -1 data_dir/reads_1.fastq -2 data_dir/reads_2.fastq -o output_dir  -p 8
 ## copy primary results to output datasets
 if [ -e output_dir/log/defuse.log ]; then cp output_dir/log/defuse.log $defuse_log; fi
-if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi
+## if [ -e output_dir/results.tsv ]; then cp output_dir/results.tsv $results_tsv; fi
 if [ -e output_dir/results.filtered.tsv ]; then cp output_dir/results.filtered.tsv $results_filtered_tsv; fi
 if [ -e output_dir/results.classify.tsv ]; then cp output_dir/results.classify.tsv $results_classify_tsv; fi
 ## create html with links for output_dir
@@ -650,7 +572,7 @@
 
 If your fastq files have reads in different orders or include unpaired reads,  you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq.
 
-DeFuse uses a Reference Dataset to search for gene fusions.  The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_:
+DeFuse uses a Reference Dataset to search for gene fusions.  The Reference Dataset is generated from the following sources in DeFuse_Version_0.4_:
     - genome_fasta from Ensembl 
     - gene_models from Ensembl 
     - repeats_filename from UCSC RepeatMasker rmsk.txt
@@ -658,7 +580,7 @@
     - est_alignments from UCSC intronEst.txt
     - unigene_fasta from NCBI
 
-.. _DeFuse_Version_0.6: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.1
+.. _DeFuse_Version_0.4: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2
 
 ------