annotate create_reference_dataset.xml @ 14:d975e466d443

Add stdio tag to create_reference_dataset.xml
author Jim Johnson <jj@umn.edu>
date Mon, 10 Jun 2013 05:43:53 -0500
parents 85693cb5339f
children 547d8db4673e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
85693cb5339f Correct tool_id create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 12
diff changeset
1 <tool id="create_defuse_reference" name="Create DeFuse Reference" version="1.6.1">
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
2 <description>create a defuse reference from Ensembl and UCSC sources</description>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
3 <requirements>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
4 <requirement type="package" version="0.6.1">defuse</requirement>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
5 <requirement type="package" version="0.1.18">samtools</requirement>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
6 <requirement type="package" version="1.0.0">bowtie</requirement>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
7 <requirement type="package" version="2013-05-09">gmap</requirement>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
8 <requirement type="package" version="latest">kent</requirement>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
9 </requirements>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
10 <command interpreter="command"> /bin/bash $shscript </command>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
11 <inputs>
14
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
12 <param name="ensembl_genome_version" type="text" value="" label="Ensembl Genome Version" help="Example: GRCh37"/>
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
13 <param name="ensembl_version" type="integer" value="" label="Esembl Release Version" help="Example: 71"/>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
14 <param name="ucsc_genome_version" type="text" value="" label="UCSC Genome Version" help="Example: hg19"/>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
15 <param name="chromosomes" type="text" value="" label="Chromosomes" help="Example: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
16 <param name="mt_chromosome" type="text" value="MT" label="Mitochonrial Chromosome" />
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
17 <param name="gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding" label="Gene sources" />
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
18 <param name="ig_gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene" label="IG Gene sources" />
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
19 <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" />
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
20 </inputs>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
21 <outputs>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
22 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
23 </outputs>
14
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
24 <stdio>
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
25 <exit_code range="1:" level="fatal" description="Error running Create DeFuse Reference" />
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
26 <regex match="Error:"
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
27 source="both"
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
28 level="fatal"
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
29 description="Error running Create DeFuse Reference" />
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
30
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
31 </stdio>
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
32 <configfiles>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
33 <configfile name="defuse_config">
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
34 #import ast
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
35 #
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
36 # Configuration file for defuse
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
37 #
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
38 # At a minimum, change all values enclused by []
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
39 #
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
40
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
41 # Directory where the defuse code was unpacked
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
42 ## Default location in the tool/defuse directory
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
43 # source_directory = ${__root_dir__}/tools/defuse
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
44 source_directory = __DEFUSE_PATH__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
45
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
46 ensembl_version = $ensembl_version
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
47 ensembl_genome_version = $ensembl_genome_version
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
48 ucsc_genome_version = $ucsc_genome_version
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
49
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
50 # Directory where you want your dataset
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
51 dataset_directory = $config_txt.extra_files_path
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
52
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
53 #raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
54 # Input genome and gene models
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
55 gene_models = $(dataset_directory)/Homo_sapiens.$(ensembl_genome_version).$(ensembl_version).gtf
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
56 genome_fasta = $(dataset_directory)/Homo_sapiens.$(ensembl_genome_version).$(ensembl_version).dna.chromosomes.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
57
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
58 # Repeat table from ucsc genome browser
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
59 repeats_filename = $(dataset_directory)/repeats.txt
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
60
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
61 # EST info downloaded from ucsc genome browser
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
62 est_fasta = $(dataset_directory)/est.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
63 est_alignments = $(dataset_directory)/intronEst.txt
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
64
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
65 # Unigene clusters downloaded from ncbi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
66 unigene_fasta = $(dataset_directory)/Hs.seq.uniq
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
67 #end raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
68
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
69 # Paths to external tools
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
70 samtools_bin = __SAMTOOLS_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
71 bowtie_bin = __BOWTIE_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
72 bowtie_build_bin = __BOWTIE_BUILD_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
73 blat_bin = __BLAT_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
74 fatotwobit_bin = __FATOTWOBIT_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
75 gmap_bin = __GMAP_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
76 gmap_setup_bin = __GMAP_SETUP_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
77 r_bin = __R_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
78 rscript_bin = __RSCRIPT_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
79
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
80 #raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
81 # Directory where you want your dataset
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
82 gmap_index_directory = $(dataset_directory)/gmap
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
83 #end raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
84
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
85 #raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
86 # Dataset files
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
87 dataset_prefix = $(dataset_directory)/defuse
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
88 chromosome_prefix = $(dataset_prefix).dna.chromosomes
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
89 exons_fasta = $(dataset_prefix).exons.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
90 cds_fasta = $(dataset_prefix).cds.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
91 cdna_regions = $(dataset_prefix).cdna.regions
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
92 cdna_fasta = $(dataset_prefix).cdna.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
93 reference_fasta = $(dataset_prefix).reference.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
94 rrna_fasta = $(dataset_prefix).rrna.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
95 ig_gene_list = $(dataset_prefix).ig.gene.list
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
96 repeats_regions = $(dataset_directory)/repeats.regions
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
97 est_split_fasta1 = $(dataset_directory)/est.1.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
98 est_split_fasta2 = $(dataset_directory)/est.2.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
99 est_split_fasta3 = $(dataset_directory)/est.3.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
100 est_split_fasta4 = $(dataset_directory)/est.4.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
101 est_split_fasta5 = $(dataset_directory)/est.5.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
102 est_split_fasta6 = $(dataset_directory)/est.6.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
103 est_split_fasta7 = $(dataset_directory)/est.7.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
104 est_split_fasta8 = $(dataset_directory)/est.8.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
105 est_split_fasta9 = $(dataset_directory)/est.9.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
106
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
107 # Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
108 prefilter1 = $(unigene_fasta)
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
109
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
110 # deFuse scripts and tools
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
111 scripts_directory = $(source_directory)/scripts
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
112 tools_directory = $(source_directory)/tools
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
113 data_directory = $(source_directory)/data
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
114 #end raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
115
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
116 #raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
117 # Bowtie parameters
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
118 bowtie_threads = 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
119 bowtie_quals = --phred33-quals
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
120 max_insert_size = 500
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
121 #end raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
122
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
123 # Parameters for building the dataset
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
124 chromosomes = $chromosomes
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
125 mt_chromosome = $mt_chromosome
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
126 gene_sources = $gene_sources
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
127 ig_gene_sources = $ig_gene_sources
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
128 rrna_gene_sources = $rrna_gene_sources
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
129
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
130 #raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
131 # Blat sequences per job
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
132 num_blat_sequences = 10000
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
133
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
134 # Minimum gene fusion range
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
135 dna_concordant_length = 2000
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
136
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
137 # Trim length for discordant reads (split reads are not trimmed)
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
138 discord_read_trim = 50
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
139
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
140 # Calculate extra annotations, fusion splice index and interrupted index
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
141 calculate_extra_annotations = no
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
142
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
143 # Filtering parameters
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
144 clustering_precision = 0.95
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
145 span_count_threshold = 5
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
146 percent_identity_threshold = 0.90
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
147 split_min_anchor = 4
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
148 splice_bias = 10
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
149 positive_controls = $(data_directory)/controls.txt
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
150 probability_threshold = 0.50
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
151
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
152 # Position density when calculating covariance
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
153 covariance_sampling_density = 0.01
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
154
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
155 # Number of reads for each job in split
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
156 reads_per_job = 1000000
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
157
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
158 # If you have command line 'mail' and wish to be notified
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
159 mailto = andrew.mcpherson@gmail.com
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
160
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
161 # Remove temp files
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
162 remove_job_files = yes
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
163 remove_job_temp_files = yes
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
164 #end raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
165 </configfile>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
166 <configfile name="shscript">
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
167 #!/bin/bash
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
168 ## define some things for cheetah proccessing
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
169 #set $ds = chr(36)
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
170 #set $amp = chr(38)
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
171 #set $gt = chr(62)
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
172 #set $lt = chr(60)
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
173 #set $echo_cmd = 'echo'
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
174 ## Find the defuse.pl in the galaxy tool path
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
175 #import Cheetah.FileUtils
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
176 ## substitute pathnames into config file
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
177 if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
178 if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
179 if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
180 if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
181 if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
182 if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
183 if `grep __GMAP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_BIN=`which gmap`;then sed -i'.tmp' "s#__GMAP_BIN__#\${GMAP_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
184 if `grep __GMAP_SETUP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_SETUP_BIN=`which gmap_setup`;then sed -i'.tmp' "s#__GMAP_SETUP_BIN__#\${GMAP_SETUP_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
185 if `grep __GMAP_INDEX_DIR__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_INDEX_DIR=`pwd`/gmap;then sed -i'.tmp' "s#__GMAP_INDEX_DIR__#\${GMAP_INDEX_DIR}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
186 if `grep __R_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} R_BIN=`which R`;then sed -i'.tmp' "s#__R_BIN__#\${R_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
187 if `grep __RSCRIPT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} RSCRIPT_BIN=`which Rscript`;then sed -i'.tmp' "s#__RSCRIPT_BIN__#\${RSCRIPT_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
188
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
189 ## copy config to output
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
190 cp $defuse_config $config_txt
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
191 ## make a data_dir and ln -s the input fastq
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
192 mkdir -p $config_txt.extra_files_path
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
193 ## run defuse.pl
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
194 perl \${DEFUSE_PATH}/scripts/create_reference_dataset.pl -c $defuse_config
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
195 </configfile>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
196 </configfiles>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
197
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
198 <tests>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
199 </tests>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
200 <help>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
201 **DeFuse**
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
202
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
203 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion.
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
204
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
205 Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
206
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
207 .. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
208
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
209 ------
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
210
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
211 **Inputs**
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
212
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
213 DeFuse requires 2 fastq files for paried reads, one with the left mate of the paired reads, and a second fastq with the the right mate of the paired reads (**with reads in the same order as in the first fastq dataset**).
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
214
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
215 If your fastq files have reads in different orders or include unpaired reads, you can preprocess them with **FASTQ interlacer** to create a single interlaced fastq dataset with only the paired reads and input that to **FASTQ de-interlacer** to separate the reads into a left fastq and right fastq.
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
216
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
217 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.4_:
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
218 - genome_fasta from Ensembl
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
219 - gene_models from Ensembl
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
220 - repeats_filename from UCSC RepeatMasker rmsk.txt
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
221 - est_fasta from UCSC
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
222 - est_alignments from UCSC intronEst.txt
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
223 - unigene_fasta from NCBI
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
224
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
225 .. _DeFuse_Version_0.4: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.4.2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
226
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
227 ------
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
228
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
229 **Outputs**
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
230
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
231 The galaxy history will contain 5 outputs: the config.txt file that provides DeFuse with its parameters, the defuse.log which details what DeFuse has done and can be useful in determining any errors, and the 3 results files that defuse generates.
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
232
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
233 DeFuse generates 3 results files: results.txt, results.filtered.txt, and results.classify.txt. All three files have the same format, though results.classify.txt has a probability column from the application of the classifier to results.txt, and results.filtered.txt has been filtered according to the threshold probability as set in config.txt.
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
234
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
235 The file format is tab delimited with one prediction per line, and the following fields per prediction (not necessarily in this order):
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
236
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
237 - **Identification**
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
238 - cluster_id : random identifier assigned to each prediction
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
239 - library_name : library name given on the command line of defuse
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
240 - gene1 : ensembl id of gene 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
241 - gene2 : ensembl id of gene 2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
242 - gene_name1 : name of gene 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
243 - gene_name2 : name of gene 2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
244 - **Evidence**
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
245 - break_predict : breakpoint prediction method, denovo or splitr, that is considered most reliable
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
246 - concordant_ratio : proportion of spanning reads considered concordant by blat
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
247 - denovo_min_count : minimum kmer count across denovo assembled sequence
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
248 - denovo_sequence : fusion sequence predicted by debruijn based denovo sequence assembly
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
249 - denovo_span_pvalue : p-value, lower values are evidence the prediction is a false positive
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
250 - gene_align_strand1 : alignment strand for spanning read alignments to gene 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
251 - gene_align_strand2 : alignment strand for spanning read alignments to gene 2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
252 - min_map_count : minimum of the number of genomic mappings for each spanning read
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
253 - max_map_count : maximum of the number of genomic mappings for each spanning read
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
254 - mean_map_count : average of the number of genomic mappings for each spanning read
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
255 - num_multi_map : number of spanning reads that map to more than one genomic location
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
256 - span_count : number of spanning reads supporting the fusion
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
257 - span_coverage1 : coverage of spanning reads aligned to gene 1 as a proportion of expected coverage
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
258 - span_coverage2 : coverage of spanning reads aligned to gene 2 as a proportion of expected coverage
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
259 - span_coverage_min : minimum of span_coverage1 and span_coverage2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
260 - span_coverage_max : maximum of span_coverage1 and span_coverage2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
261 - splitr_count : number of split reads supporting the prediction
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
262 - splitr_min_pvalue : p-value, lower values are evidence the prediction is a false positive
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
263 - splitr_pos_pvalue : p-value, lower values are evidence the prediction is a false positive
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
264 - splitr_sequence : fusion sequence predicted by split reads
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
265 - splitr_span_pvalue : p-value, lower values are evidence the prediction is a false positive
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
266 - **Annotation**
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
267 - adjacent : fusion between adjacent genes
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
268 - altsplice : fusion likely the product of alternative splicing between adjacent genes
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
269 - break_adj_entropy1 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
270 - break_adj_entropy2 : di-nucleotide entropy of the 40 nucleotides adjacent to the fusion splice in gene 2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
271 - break_adj_entropy_min : minimum of break_adj_entropy1 and break_adj_entropy2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
272 - breakpoint_homology : number of nucleotides at the fusion splice that align equally well to gene 1 or gene 2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
273 - breakseqs_estislands_percident : maximum percent identity of fusion sequence alignments to est islands
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
274 - cdna_breakseqs_percident : maximum percent identity of fusion sequence alignments to cdna
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
275 - deletion : fusion produced by a genomic deletion
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
276 - est_breakseqs_percident : maximum percent identity of fusion sequence alignments to est
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
277 - eversion : fusion produced by a genomic eversion
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
278 - exonboundaries : fusion splice at exon boundaries
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
279 - expression1 : expression of gene 1 as number of concordant pairs aligned to exons
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
280 - expression2 : expression of gene 2 as number of concordant pairs aligned to exons
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
281 - gene_chromosome1 : chromosome of gene 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
282 - gene_chromosome2 : chromosome of gene 2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
283 - gene_end1 : end position for gene 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
284 - gene_end2 : end position for gene 2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
285 - gene_location1 : location of breakpoint in gene 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
286 - gene_location2 : location of breakpoint in gene 2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
287 - gene_start1 : start of gene 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
288 - gene_start2 : start of gene 2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
289 - gene_strand1 : strand of gene 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
290 - gene_strand2 : strand of gene 2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
291 - genome_breakseqs_percident : maximum percent identity of fusion sequence alignments to genome
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
292 - genomic_break_pos1 : genomic position in gene 1 of fusion splice / breakpoint
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
293 - genomic_break_pos2 : genomic position in gene 2 of fusion splice / breakpoint
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
294 - genomic_strand1 : genomic strand in gene 1 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
295 - genomic_strand2 : genomic strand in gene 2 of fusion splice / breakpoint, retained sequence upstream on this strand, breakpoint is downstream
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
296 - interchromosomal : fusion produced by an interchromosomal translocation
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
297 - interrupted_index1 : ratio of coverage before and after the fusion splice / breakpoint in gene 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
298 - interrupted_index2 : ratio of coverage before and after the fusion splice / breakpoint in gene 2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
299 - inversion : fusion produced by genomic inversion
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
300 - orf : fusion combines genes in a way that preserves a reading frame
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
301 - probability : probability produced by classification using adaboost and example positives/negatives (only given in results.classified.txt)
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
302 - read_through : fusion involving adjacent potentially resulting from co-transcription rather than genome rearrangement
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
303 - repeat_proportion1 : proportion of the spanning reads in gene 1 that span a repeat region
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
304 - repeat_proportion2 : proportion of the spanning reads in gene 2 that span a repeat region
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
305 - max_repeat_proportion : max of repeat_proportion1 and repeat_proportion2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
306 - splice_score : number of nucleotides similar to GTAG at fusion splice
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
307 - num_splice_variants : number of potential splice variants for this gene pair
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
308 - splicing_index1 : number of concordant pairs in gene 1 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
309 - splicing_index2 : number of concordant pairs in gene 2 spanning the fusion splice / breakpoint, divided by number of spanning reads supporting the fusion with gene 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
310
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
311
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
312 **Example**
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
313
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
314 results.tsv::
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
315
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
316 cluster_id splitr_sequence splitr_count splitr_span_pvalue splitr_pos_pvalue splitr_min_pvalue adjacent altsplice break_adj_entropy1 break_adj_entropy2 break_adj_entropy_min break_predict breakpoint_homology breakseqs_estislands_percident cdna_breakseqs_percident concordant_ratio deletion est_breakseqs_percident eversion exonboundaries expression1 expression2 gene1 gene2 gene_align_strand1 gene_align_strand2 gene_chromosome1 gene_chromosome2 gene_end1 gene_end2 gene_location1 gene_location2 gene_name1 gene_name2 gene_start1 gene_start2 gene_strand1 gene_strand2 genome_breakseqs_percident genomic_break_pos1 genomic_break_pos2 genomic_strand1 genomic_strand2 interchromosomal interrupted_index1 interrupted_index2 inversion library_name max_map_count max_repeat_proportion mean_map_count min_map_count num_multi_map num_splice_variants orf read_through repeat_proportion1 repeat_proportion2 span_count span_coverage1 span_coverage2 span_coverage_max span_coverage_min splice_score splicing_index1 splicing_index2
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
317 1169 GCTTACTGTATGCCAGGCCCCAGAGGGGCAACCACCCTCTAAAGAGAGCGGCTCCTGCCTCCCAGAAAGCTCACAGACTGTGGGAGGGAAACAGGCAGCAGGTGAAGATGCCAAATGCCAGGATATCTGCCCTGTCCTTGCTTGATGCAGCTGCTGGCTCCCACGTTCTCCCCAGAATCCCCTCACACTCCTGCTGTTTTCTCTGCAGGTTGGCAGAGCCCCATGAGGGCAGGGCAGCCACTTTGTTCTTGGGCGGCAAACCTCCCTGGGCGGCACGGAAACCACGGTGAGAAGGGGGCAGGTCGGGCACGTGCAGGGACCACGCTGCAGG|TGTACCCAACAGCTCCGAAGAGACAGCGACCATCGAGAACGGGCCATGATGACGATGGCGGTTTTGTCGAAAAGAAAAGGGGGAAATGTGGGGAAAAGCAAGAGAGATCAGATTGTTACTGTGTCTGTGTAGAAAGAAGTAGACATGGGAGACTCCATTTTGTTCTGTACTAAGAAAAATTCTTCTGCCTTGAGATTCGGTGACCCCACCCCCAACCCCGTGCTCTCTGAAACATGTGCTGTGTCCACTCAGGGTTGAATGGATTAAGGGCGGTGCGAGACGTGCTTT 2 0.000436307890680442 0.110748295953850 0.0880671602973091 N Y 3.19872427442695 3.48337348351473 3.19872427442695 splitr 0 0 0 0 Y 0 N N 0 0 ENSG00000105549 ENSG00000213753 + - 19 19 376013 59111168 intron upstream THEG AC016629.2 361750 59084870 - + 0 375099 386594 + - N 8.34107429512245 - N output_dir 82 0.677852348993289 40.6666666666667 1 11 1 N N 0.361271676300578 0.677852348993289 12 0.758602776578432 0.569678713445872 0.758602776578432 0.569678713445872 2 0.416666666666667 -
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
318 3596 TGGGGGTTGAGGCTTCTGTTCCCAGGTTCCATGACCTCAGAGGTGGCTGGTGAGGTTATGACCTTTGCCCTCCAGCCCTGGCTTAAAACCTCAGCCCTAGGACCTGGTTAAAGGAAGGGGAGATGGAGCTTTGCCCCGACCCCCCCCCGTTCCCCTCACCTGTCAGCCCGAGCTGGGCCAGGGCCCCTAGGTGGGGAACTGGGCCGGGGGGCGGGCACAAGCGGAGGTGGTGCCCCCAAAAGGGCTCCCGGTGGGGTCTTGCTGAGAAGGTGAGGGGTTCCCGGGGCCGCAGCAGGTGGTGGTGGAGGAGCCAAGCGGCTGTAGAGCAAGGGGTGAGCAGGTTCCAGACCGTAGAGGCGGGCAGCGGCCACGGCCCCGGGTCCAGTTAGCTCCTCACCCGCCTCATAGAAGCGGGGTGGCCTTGCCAGGCGTGGGGGTGCTGCC|TTCCTTGGATGTGGTAGCCGTTTCTCAGGCTCCCTCTCCGGAATCGAACCCTGATTCCCCGTCACCCGTGGTCACCATGGTAGGCACGGCGACTACCATCGAAAGTTGATAGGGCAGACGTTCGAATGGGTCGTCGCCGCCACGGGGGGCGTGCGATCAGCCCGAGGTTATCTAGAGTCACCAAAGCCGCCGGCGCCCGCCCCCCGGCCGGGGCCGGAGAGGGGCTGACCGGGTTGGTTTTGATCTGATAAATGCACGCATCCCCCCCGCGAAGGGGGTCAGCGCCCGTCGGCATGTATTAGCTCTAGAATTACCACAGTTATCCAAGTAGGAGAGGAGCGAGCGACCAAAGGAACCATAACTGATTTAATGAGCCATTCGCAGTTTCACTGTACCGGCCGTGCGTACTTAGACATGCATGGCTTAATCTTTGAGACAAGCATATGCTACTGGCAGG 250 7.00711162298275e-72 0.00912124762512338 0.00684237452309549 N N 3.31745197152461 3.47233119514066 3.31745197152461 splitr 7 0.0157657657657656 0 0 N 0.0135135135135136 N N 0 0 ENSG00000156860 ENSG00000212932 - + 16 21 30682131 48111157 coding upstream FBRS RPL23AP4 30670289 48110676 + + 0.0157657657657656 30680678 9827473 - + Y - - N output_dir 2 1 1.11111111111111 1 1 1 N N 0 1 9 0.325530693397641 0.296465452915709 0.325530693397641 0.296465452915709 2 - -
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
319
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
320 </help>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
321 </tool>