annotate create_reference_dataset.xml @ 18:547d8db4673e

Update create_reference_dataset for non human genome builds
author Jim Johnson <jj@umn.edu>
date Sat, 15 Jun 2013 14:36:47 -0500
parents d975e466d443
children 1af6f32ff592
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
85693cb5339f Correct tool_id create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 12
diff changeset
1 <tool id="create_defuse_reference" name="Create DeFuse Reference" version="1.6.1">
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
2 <description>create a defuse reference from Ensembl and UCSC sources</description>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
3 <requirements>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
4 <requirement type="package" version="0.6.1">defuse</requirement>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
5 <requirement type="package" version="0.1.18">samtools</requirement>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
6 <requirement type="package" version="1.0.0">bowtie</requirement>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
7 <requirement type="package" version="2013-05-09">gmap</requirement>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
8 <requirement type="package" version="latest">kent</requirement>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
9 </requirements>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
10 <command interpreter="command"> /bin/bash $shscript </command>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
11 <inputs>
18
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
12 <conditional name="genome">
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
13 <param name="choice" type="select" label="Select a Genome Build">
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
14 <option value="GRCh37">Homo_sapiens GRCh37 hg19</option>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
15 <option value="NCBI36">Homo_sapiens NCBI36 hg18</option>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
16 <option value="GRCm38">Mus_musculus GRCm38 mm10</option>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
17 <option value="NCBIM37">Mus_musculus NCBIM37 mm9</option>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
18 <option value="Rnor_5.0">Rattus_norvegicus Rnor_5.0 rn5</option>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
19 <option value="user_specified">User specified</option>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
20 </param>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
21 <when value="GRCh37">
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
22 <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
23 <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
24 <param name="ensembl_genome_version" type="hidden" value="GRCh37"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
25 <param name="ensembl_version" type="hidden" value="71"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
26 <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
27 <param name="ncbi_prefix" type="hidden" value="Hs"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
28 <param name="ucsc_genome_version" type="hidden" value="hg19"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
29 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
30 <param name="mt_chromosome" type="hidden" value="MT"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
31 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
32 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
33 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
34 </when>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
35 <when value="NCBI36">
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
36 <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
37 <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
38 <param name="ensembl_genome_version" type="hidden" value="NCBI36"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
39 <param name="ensembl_version" type="hidden" value="54"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
40 <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
41 <param name="ncbi_prefix" type="hidden" value="Hs"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
42 <param name="ucsc_genome_version" type="hidden" value="hg18"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
43 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
44 <param name="mt_chromosome" type="hidden" value="MT"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
45 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
46 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
47 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
48 </when>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
49 <when value="GRCm38">
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
50 <param name="ensembl_organism" type="hidden" value="mus_musculus"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
51 <param name="ensembl_prefix" type="hidden" value="Mus_musculus"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
52 <param name="ensembl_genome_version" type="hidden" value="GRCm38"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
53 <param name="ensembl_version" type="hidden" value="71"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
54 <param name="ncbi_organism" type="hidden" value="Mus_musculus"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
55 <param name="ncbi_prefix" type="hidden" value="Mm"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
56 <param name="ucsc_genome_version" type="hidden" value="mm10"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
57 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
58 <param name="mt_chromosome" type="hidden" value="MT"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
59 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
60 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
61 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
62 </when>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
63 <when value="NCBIM37">
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
64 <param name="ensembl_organism" type="hidden" value="mus_musculus"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
65 <param name="ensembl_prefix" type="hidden" value="Mus_musculus"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
66 <param name="ensembl_genome_version" type="hidden" value="NCBIM37"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
67 <param name="ensembl_version" type="hidden" value="67"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
68 <param name="ncbi_organism" type="hidden" value="Mus_musculus"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
69 <param name="ncbi_prefix" type="hidden" value="Mm"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
70 <param name="ucsc_genome_version" type="hidden" value="mm9"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
71 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
72 <param name="mt_chromosome" type="hidden" value="MT"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
73 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
74 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
75 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
76 </when>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
77 <when value="Rnor_5.0">
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
78 <param name="ensembl_organism" type="hidden" value="rattus_norvegicus"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
79 <param name="ensembl_prefix" type="hidden" value="Rattus_norvegicus"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
80 <param name="ensembl_genome_version" type="hidden" value="Rnor_5.0"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
81 <param name="ensembl_version" type="hidden" value="71"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
82 <param name="ncbi_organism" type="hidden" value="Rattus_norvegicus"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
83 <param name="ncbi_prefix" type="hidden" value="Rn"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
84 <param name="ucsc_genome_version" type="hidden" value="rn5"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
85 <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,MT"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
86 <param name="mt_chromosome" type="hidden" value="MT"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
87 <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
88 <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
89 <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
90 </when>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
91 <when value="user_specified">
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
92 <param name="ensembl_organism" type="text" value="" label="Ensembl Organism Name" help="Examples: homo_sapiens, mus_musculus, rattus_norvegicus"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
93 <param name="ensembl_prefix" type="text" value="" label="Ensembl Organism prefix" help="Examples: Homo_sapiens, Mus_musculus, Rattus_norvegicus"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
94 <param name="ensembl_genome_version" type="text" value="" label="Ensembl Genome Version" help="Examples: GRCh37, GRCm38, Rnor_5.0"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
95 <param name="ensembl_version" type="integer" value="" label="Ensembl Release Version" help="Example: 71"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
96 <param name="ncbi_organism" type="text" value="" label="NCBI Organism Name" help="Examples: Homo_sapiens, Mus_musculus, Rattus_norvegicus"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
97 <param name="ncbi_prefix" type="text" value="" label="NCBI Organism Unigene prefix" help="Examples: Hs, Mm, Rn"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
98 <param name="ucsc_genome_version" type="text" value="" label="UCSC Genome Version" help="Examples: hg19, mm10, rn5"/>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
99 <param name="chromosomes" type="text" value="" label="Chromosomes for Ensembl genome build" >
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
100 <help> Examples:
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
101 Homo_sapiens: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
102 Mus_musculus: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,Y,MT
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
103 Rattus_norvegicus: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X,MT
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
104 ( ftp://ftp.ensembl.org/pub/release-71/fasta/homo_sapiens/dna/ )
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
105 </help>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
106 </param>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
107 <param name="mt_chromosome" type="text" value="MT" label="Ensembl Mitochonrial Chromosome name" />
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
108 <param name="gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding" label="Gene sources" />
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
109 <param name="ig_gene_sources" type="text" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene" label="IG Gene sources" />
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
110 <param name="rrna_gene_sources" type="text" value="Mt_rRNA,rRNA,rRNA_pseudogene" label="Ribosomal Gene sources" />
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
111 </when>
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
112 </conditional>
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
113 </inputs>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
114 <outputs>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
115 <data format="txt" name="config_txt" label="${tool.name} on ${on_string}: config.txt"/>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
116 </outputs>
14
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
117 <stdio>
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
118 <exit_code range="1:" level="fatal" description="Error running Create DeFuse Reference" />
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
119 <regex match="Error:"
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
120 source="both"
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
121 level="fatal"
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
122 description="Error running Create DeFuse Reference" />
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
123
d975e466d443 Add stdio tag to create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents: 13
diff changeset
124 </stdio>
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
125 <configfiles>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
126 <configfile name="defuse_config">
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
127 #import ast
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
128 #
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
129 # Configuration file for defuse
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
130 #
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
131 # At a minimum, change all values enclused by []
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
132 #
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
133
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
134 # Directory where the defuse code was unpacked
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
135 ## Default location in the tool/defuse directory
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
136 # source_directory = ${__root_dir__}/tools/defuse
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
137 source_directory = __DEFUSE_PATH__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
138
18
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
139 ensembl_organism = $genome.ensembl_organism
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
140 ensembl_prefix = $genome.ensembl_prefix
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
141 ensembl_version = $genome.ensembl_version
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
142 ensembl_genome_version = $genome.ensembl_genome_version
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
143 ucsc_genome_version = $genome.ucsc_genome_version
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
144 ncbi_organism = $genome.ncbi_organism
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
145 ncbi_prefix = $genome.ncbi_prefix
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
146
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
147 # Directory where you want your dataset
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
148 dataset_directory = $config_txt.extra_files_path
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
149
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
150 #raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
151 # Input genome and gene models
18
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
152 gene_models = $(dataset_directory)/$(ensembl_prefix).$(ensembl_genome_version).$(ensembl_version).gtf
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
153 genome_fasta = $(dataset_directory)/$(ensembl_prefix).$(ensembl_genome_version).$(ensembl_version).dna.chromosomes.fa
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
154
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
155 # Repeat table from ucsc genome browser
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
156 repeats_filename = $(dataset_directory)/repeats.txt
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
157
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
158 # EST info downloaded from ucsc genome browser
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
159 est_fasta = $(dataset_directory)/est.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
160 est_alignments = $(dataset_directory)/intronEst.txt
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
161
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
162 # Unigene clusters downloaded from ncbi
18
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
163 unigene_fasta = $(dataset_directory)/$(ncbi_prefix).seq.uniq
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
164 #end raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
165
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
166 # Paths to external tools
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
167 samtools_bin = __SAMTOOLS_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
168 bowtie_bin = __BOWTIE_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
169 bowtie_build_bin = __BOWTIE_BUILD_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
170 blat_bin = __BLAT_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
171 fatotwobit_bin = __FATOTWOBIT_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
172 gmap_bin = __GMAP_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
173 gmap_setup_bin = __GMAP_SETUP_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
174 r_bin = __R_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
175 rscript_bin = __RSCRIPT_BIN__
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
176
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
177 #raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
178 # Directory where you want your dataset
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
179 gmap_index_directory = $(dataset_directory)/gmap
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
180 #end raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
181
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
182 #raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
183 # Dataset files
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
184 dataset_prefix = $(dataset_directory)/defuse
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
185 chromosome_prefix = $(dataset_prefix).dna.chromosomes
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
186 exons_fasta = $(dataset_prefix).exons.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
187 cds_fasta = $(dataset_prefix).cds.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
188 cdna_regions = $(dataset_prefix).cdna.regions
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
189 cdna_fasta = $(dataset_prefix).cdna.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
190 reference_fasta = $(dataset_prefix).reference.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
191 rrna_fasta = $(dataset_prefix).rrna.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
192 ig_gene_list = $(dataset_prefix).ig.gene.list
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
193 repeats_regions = $(dataset_directory)/repeats.regions
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
194 est_split_fasta1 = $(dataset_directory)/est.1.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
195 est_split_fasta2 = $(dataset_directory)/est.2.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
196 est_split_fasta3 = $(dataset_directory)/est.3.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
197 est_split_fasta4 = $(dataset_directory)/est.4.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
198 est_split_fasta5 = $(dataset_directory)/est.5.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
199 est_split_fasta6 = $(dataset_directory)/est.6.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
200 est_split_fasta7 = $(dataset_directory)/est.7.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
201 est_split_fasta8 = $(dataset_directory)/est.8.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
202 est_split_fasta9 = $(dataset_directory)/est.9.fa
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
203
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
204 # Fasta files with bowtie indices for prefiltering reads for concordantly mapping pairs
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
205 prefilter1 = $(unigene_fasta)
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
206
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
207 # deFuse scripts and tools
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
208 scripts_directory = $(source_directory)/scripts
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
209 tools_directory = $(source_directory)/tools
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
210 data_directory = $(source_directory)/data
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
211 #end raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
212
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
213 #raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
214 # Bowtie parameters
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
215 bowtie_threads = 1
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
216 bowtie_quals = --phred33-quals
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
217 max_insert_size = 500
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
218 #end raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
219
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
220 # Parameters for building the dataset
18
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
221 chromosomes = $genome.chromosomes
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
222 mt_chromosome = $genome.mt_chromosome
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
223 gene_sources = $genome.gene_sources
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
224 ig_gene_sources = $genome.ig_gene_sources
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
225 rrna_gene_sources = $genome.rrna_gene_sources
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
226
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
227 #raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
228 # Blat sequences per job
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
229 num_blat_sequences = 10000
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
230
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
231 # Minimum gene fusion range
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
232 dna_concordant_length = 2000
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
233
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
234 # Trim length for discordant reads (split reads are not trimmed)
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
235 discord_read_trim = 50
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
236
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
237 # Calculate extra annotations, fusion splice index and interrupted index
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
238 calculate_extra_annotations = no
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
239
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
240 # Filtering parameters
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
241 clustering_precision = 0.95
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
242 span_count_threshold = 5
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
243 percent_identity_threshold = 0.90
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
244 split_min_anchor = 4
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
245 splice_bias = 10
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
246 positive_controls = $(data_directory)/controls.txt
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
247 probability_threshold = 0.50
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
248
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
249 # Position density when calculating covariance
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
250 covariance_sampling_density = 0.01
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
251
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
252 # Number of reads for each job in split
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
253 reads_per_job = 1000000
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
254
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
255 # If you have command line 'mail' and wish to be notified
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
256 mailto = andrew.mcpherson@gmail.com
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
257
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
258 # Remove temp files
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
259 remove_job_files = yes
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
260 remove_job_temp_files = yes
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
261 #end raw
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
262 </configfile>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
263 <configfile name="shscript">
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
264 #!/bin/bash
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
265 ## define some things for cheetah proccessing
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
266 #set $amp = chr(38)
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
267 #set $gt = chr(62)
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
268 ## substitute pathnames into config file
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
269 if `grep __DEFUSE_PATH__ $defuse_config ${gt} /dev/null`;then sed -i'.tmp' "s#__DEFUSE_PATH__#\${DEFUSE_PATH}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
270 if `grep __SAMTOOLS_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} SAMTOOLS_BIN=`which samtools`;then sed -i'.tmp' "s#__SAMTOOLS_BIN__#\${SAMTOOLS_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
271 if `grep __BOWTIE_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BIN=`which bowtie`;then sed -i'.tmp' "s#__BOWTIE_BIN__#\${BOWTIE_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
272 if `grep __BOWTIE_BUILD_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BOWTIE_BUILD_BIN=`which bowtie-build`;then sed -i'.tmp' "s#__BOWTIE_BUILD_BIN__#\${BOWTIE_BUILD_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
273 if `grep __BLAT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} BLAT_BIN=`which blat`;then sed -i'.tmp' "s#__BLAT_BIN__#\${BLAT_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
274 if `grep __FATOTWOBIT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} FATOTWOBIT_BIN=`which faToTwoBit`;then sed -i'.tmp' "s#__FATOTWOBIT_BIN__#\${FATOTWOBIT_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
275 if `grep __GMAP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_BIN=`which gmap`;then sed -i'.tmp' "s#__GMAP_BIN__#\${GMAP_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
276 if `grep __GMAP_SETUP_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_SETUP_BIN=`which gmap_setup`;then sed -i'.tmp' "s#__GMAP_SETUP_BIN__#\${GMAP_SETUP_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
277 if `grep __GMAP_INDEX_DIR__ $defuse_config ${gt} /dev/null` ${amp}${amp} GMAP_INDEX_DIR=`pwd`/gmap;then sed -i'.tmp' "s#__GMAP_INDEX_DIR__#\${GMAP_INDEX_DIR}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
278 if `grep __R_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} R_BIN=`which R`;then sed -i'.tmp' "s#__R_BIN__#\${R_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
279 if `grep __RSCRIPT_BIN__ $defuse_config ${gt} /dev/null` ${amp}${amp} RSCRIPT_BIN=`which Rscript`;then sed -i'.tmp' "s#__RSCRIPT_BIN__#\${RSCRIPT_BIN}#" $defuse_config; fi
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
280 ## copy config to output
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
281 cp $defuse_config $config_txt
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
282 ## make a data_dir and ln -s the input fastq
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
283 mkdir -p $config_txt.extra_files_path
18
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
284 ## create_reference_dataset.pl
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
285 perl \${DEFUSE_PATH}/scripts/create_reference_dataset.pl -c $defuse_config
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
286 </configfile>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
287 </configfiles>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
288
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
289 <tests>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
290 </tests>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
291 <help>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
292 **DeFuse**
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
293
18
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
294 DeFuse_ is a software package for gene fusion discovery using RNA-Seq data. The software uses clusters of discordant paired end alignments to inform a split read alignment analysis for finding fusion boundaries. The software also employs a number of heuristic filters in an attempt to reduce the number of false positives and produces a fully annotated output for each predicted fusion. See the DeFuse_Version_0.6.1_ manual for details.
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
295
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
296 DeFuse uses a Reference Dataset to search for gene fusions. The Reference Dataset is generated from the following sources in DeFuse_Version_0.6_:
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
297 - genome_fasta from Ensembl
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
298 - gene_models from Ensembl
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
299 - repeats_filename from UCSC RepeatMasker rmsk.txt
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
300 - est_fasta from UCSC
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
301 - est_alignments from UCSC intronEst.txt
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
302 - unigene_fasta from NCBI
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
303
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
304 The create_defuse_reference Galaxy tool downloads the reference genome and other source files, and builds any derivative files including bowtie indices, gmap indices, and 2bit files. Expect this step to take at least 12 hours.
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
305
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
306
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
307 It will generate a config.txt file that can be input into the deFuse Galaxy tool.
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
308
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
309 Journal reference: http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001138
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
310
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
311 .. _DeFuse: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=Main_Page
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
312
18
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
313 .. _DeFuse_Version_0.6: http://sourceforge.net/apps/mediawiki/defuse/index.php?title=DeFuse_Version_0.6.1
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
314
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
315 ------
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
316
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
317 **Outputs**
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
318
18
547d8db4673e Update create_reference_dataset for non human genome builds
Jim Johnson <jj@umn.edu>
parents: 14
diff changeset
319 The galaxy history will contain: the config.txt file that provides DeFuse with the reference data paths.
12
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
320
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
321 </help>
33e2235bf003 Add create_reference_dataset.xml
Jim Johnson <jj@umn.edu>
parents:
diff changeset
322 </tool>