changeset 41:74de1ccc51db

Add GRCh38 hg38, minor fixes.
author Jim Johnson <jj@umn.edu>
date Fri, 18 Dec 2015 14:37:33 -0600
parents ed07bcc39f6e
children bfb65cebe229
files create_reference_dataset.xml datamanager_create_reference.xml defuse.xml
diffstat 3 files changed, 42 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/create_reference_dataset.xml	Wed May 06 14:31:57 2015 -0500
+++ b/create_reference_dataset.xml	Fri Dec 18 14:37:33 2015 -0600
@@ -11,6 +11,7 @@
  <inputs>
   <conditional name="genome">
     <param name="choice" type="select" label="Select a Genome Build">
+      <option value="GRCh38">Homo_sapiens GRCh38  hg38</option>
       <option value="GRCh37">Homo_sapiens GRCh37  hg19</option>
       <option value="NCBI36">Homo_sapiens NCBI36 hg18</option>
       <option value="GRCm38">Mus_musculus GRCm38 mm10</option>
@@ -18,6 +19,20 @@
       <option value="Rnor_5.0">Rattus_norvegicus Rnor_5.0 rn5</option>
       <option value="user_specified">User specified</option>
     </param>
+    <when value="GRCh38">
+      <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
+      <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
+      <param name="ensembl_genome_version" type="hidden" value="GRCh38"/>
+      <param name="ensembl_version" type="hidden" value="80"/>
+      <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/>
+      <param name="ncbi_prefix" type="hidden" value="Hs"/>
+      <param name="ucsc_genome_version" type="hidden" value="hg38"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
     <when value="GRCh37">
       <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
       <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
@@ -220,6 +235,9 @@
 gene_sources = $genome.gene_sources
 ig_gene_sources = $genome.ig_gene_sources
 rrna_gene_sources = $genome.rrna_gene_sources
+gene_biotypes = $genome.gene_sources
+ig_gene_biotypes = $genome.ig_gene_sources
+rrna_gene_biotypes = $genome.rrna_gene_sources
 
 #raw
 # Remove temp files
--- a/datamanager_create_reference.xml	Wed May 06 14:31:57 2015 -0500
+++ b/datamanager_create_reference.xml	Fri Dec 18 14:37:33 2015 -0600
@@ -17,6 +17,7 @@
  <inputs>
   <conditional name="genome">
     <param name="choice" type="select" label="Select a Genome Build">
+      <option value="GRCh38">Homo_sapiens GRCh38  hg38</option>
       <option value="GRCh37">Homo_sapiens GRCh37  hg19</option>
       <option value="NCBI36">Homo_sapiens NCBI36 hg18</option>
       <option value="GRCm38">Mus_musculus GRCm38 mm10</option>
@@ -24,6 +25,20 @@
       <option value="Rnor_5.0">Rattus_norvegicus Rnor_5.0 rn5</option>
       <option value="user_specified">User specified</option>
     </param>
+    <when value="GRCh38">
+      <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
+      <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
+      <param name="ensembl_genome_version" type="hidden" value="GRCh38"/>
+      <param name="ensembl_version" type="hidden" value="80"/>
+      <param name="ncbi_organism" type="hidden" value="Homo_sapiens"/>
+      <param name="ncbi_prefix" type="hidden" value="Hs"/>
+      <param name="ucsc_genome_version" type="hidden" value="hg38"/>
+      <param name="chromosomes" type="hidden" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y,MT"/>
+      <param name="mt_chromosome" type="hidden" value="MT"/>
+      <param name="gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,processed_transcript,protein_coding"/>
+      <param name="ig_gene_sources" type="hidden" value="IG_C_gene,IG_D_gene,IG_J_gene,IG_V_gene,IG_pseudogene"/>
+      <param name="rrna_gene_sources" type="hidden" value="Mt_rRNA,rRNA,rRNA_pseudogene"/>
+    </when>
     <when value="GRCh37">
       <param name="ensembl_organism" type="hidden" value="homo_sapiens"/>
       <param name="ensembl_prefix" type="hidden" value="Homo_sapiens"/>
@@ -221,6 +236,9 @@
 gene_sources = $genome.gene_sources
 ig_gene_sources = $genome.ig_gene_sources
 rrna_gene_sources = $genome.rrna_gene_sources
+gene_biotypes = $genome.gene_sources
+ig_gene_biotypes = $genome.ig_gene_sources
+rrna_gene_biotypes = $genome.rrna_gene_sources
 
 #raw
 # Remove temp files
@@ -228,7 +246,7 @@
 remove_job_temp_files                       = yes
 #end raw
   </configfile>
-  <configfile name="defuse_script">
+  <configfile name="defuse_script">#slurp
 #!/bin/bash
 ## define some things for cheetah proccessing
 #set $amp = chr(38)
--- a/defuse.xml	Wed May 06 14:31:57 2015 -0500
+++ b/defuse.xml	Fri Dec 18 14:37:33 2015 -0600
@@ -79,7 +79,7 @@
   <param name="keep_output" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Save DeFuse working directory files" 
          help="The defuse output working directory can be helpful for determining errors that may have occurred during the run, 
                but they require considerable diskspace, and should be deleted and purged when no longer needed."/>
-  <param name="do_get_reads" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/>
+  <param name="do_get_reads" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Run get_reads on each cluster"/>
  </inputs>
  <stdio>
    <exit_code range="1:"  level="fatal" description="Error Running Defuse" />
@@ -570,8 +570,10 @@
 cp $defuse_config $config_txt
 ## make a data_dir  and ln -s the input fastq
 mkdir -p data_dir
-ln -s $left_pairendreads data_dir/reads_1.fastq
-ln -s $right_pairendreads data_dir/reads_2.fastq
+## ln -s "$left_pairendreads" data_dir/reads_1.fastq
+## ln -s "$right_pairendreads" data_dir/reads_2.fastq
+cp "$left_pairendreads" data_dir/reads_1.fastq
+cp "$right_pairendreads" data_dir/reads_2.fastq
 ## ln to output_dir in from_work_dir
 #if $defuse_out.__str__ != 'None':
 mkdir -p $defuse_out.dataset.extra_files_path