diff create_reference_dataset.xml @ 19:1af6f32ff592

Add datamanager, move to defuse_reference.loc
author Jim Johnson <jj@umn.edu>
date Fri, 21 Jun 2013 14:46:11 -0500
parents 547d8db4673e
children 3099cec648e7
line wrap: on
line diff
--- a/create_reference_dataset.xml	Sat Jun 15 14:36:47 2013 -0500
+++ b/create_reference_dataset.xml	Fri Jun 21 14:46:11 2013 -0500
@@ -7,7 +7,7 @@
   <requirement type="package" version="2013-05-09">gmap</requirement>
   <requirement type="package" version="latest">kent</requirement>
  </requirements>
-  <command interpreter="command"> /bin/bash $shscript </command>
+  <command interpreter="command"> /bin/bash $defuse_script </command>
  <inputs>
   <conditional name="genome">
     <param name="choice" type="select" label="Select a Genome Build">
@@ -112,7 +112,7 @@
   </conditional>
  </inputs>
  <outputs>
-  <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/>
+  <data format="defuse.conf" name="config_txt" label="${tool.name} on ${genome.ensembl_genome_version} : config.txt"/>
  </outputs>
   <stdio>
     <exit_code range="1:"  level="fatal"   description="Error running Create DeFuse Reference" />
@@ -124,18 +124,17 @@
   </stdio>
  <configfiles>
   <configfile name="defuse_config">
-#import ast
 #
 # Configuration file for defuse
 #
-# At a minimum, change all values enclused by []
+# Variables that desiganate the PATH to an application, e.g. __SAMTOOLS_BIN__ 
+#   will be set by the runtime script using the ENV PATH
 #
 
 # Directory where the defuse code was unpacked
-## Default location in the tool/defuse directory  
-# source_directory = ${__root_dir__}/tools/defuse
 source_directory = __DEFUSE_PATH__
 
+# Organism IDs
 ensembl_organism = $genome.ensembl_organism
 ensembl_prefix = $genome.ensembl_prefix
 ensembl_version = $genome.ensembl_version
@@ -210,13 +209,6 @@
 data_directory       = $(source_directory)/data
 #end raw
 
-#raw
-# Bowtie parameters
-bowtie_threads                              = 1
-bowtie_quals                                = --phred33-quals
-max_insert_size                             = 500
-#end raw
-
 # Parameters for building the dataset
 chromosomes = $genome.chromosomes
 mt_chromosome = $genome.mt_chromosome
@@ -225,42 +217,12 @@
 rrna_gene_sources = $genome.rrna_gene_sources
 
 #raw
-# Blat sequences per job
-num_blat_sequences                          = 10000
-
-# Minimum gene fusion range
-dna_concordant_length                       = 2000
-
-# Trim length for discordant reads (split reads are not trimmed)
-discord_read_trim                           = 50
-
-# Calculate extra annotations, fusion splice index and interrupted index
-calculate_extra_annotations                 = no
-
-# Filtering parameters
-clustering_precision                        = 0.95
-span_count_threshold                        = 5
-percent_identity_threshold                  = 0.90
-split_min_anchor                            = 4
-splice_bias                                 = 10
-positive_controls                           = $(data_directory)/controls.txt
-probability_threshold                       = 0.50
-
-# Position density when calculating covariance
-covariance_sampling_density                 = 0.01
-
-# Number of reads for each job in split
-reads_per_job                               = 1000000
-
-# If you have command line 'mail' and wish to be notified
-mailto                                      = andrew.mcpherson@gmail.com
-
 # Remove temp files
 remove_job_files                            = yes
 remove_job_temp_files                       = yes
 #end raw
   </configfile>
-  <configfile name="shscript">
+  <configfile name="defuse_script">
 #!/bin/bash
 ## define some things for cheetah proccessing
 #set $amp = chr(38)
@@ -291,7 +253,7 @@
  <help>
 **DeFuse**
 
-DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.  See the DeFuse_Version_0.6.1_ manual for details.
+DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.  See the DeFuse_Version_0.6_ manual for details.
 
 DeFuse uses a Reference Dataset to search for gene fusions.  The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_:
     - genome_fasta from Ensembl