changeset 0:75d5828ff343 draft

Uploaded
author elixir-it
date Fri, 09 Nov 2018 06:02:18 -0500
parents
children 210eb83f4188
files bed_macros.xml covacs_HaplotypeCaller.xml mv_untar_gatk.sh tool-data/covacs_bed.loc.sample tool-data/covacs_gatk_indexes.loc.sample tool_data_table_conf.xml.sample
diffstat 6 files changed, 229 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bed_macros.xml	Fri Nov 09 06:02:18 2018 -0500
@@ -0,0 +1,22 @@
+<macros>
+  <macro name="bed_loc">
+      <conditional name="bed_source">
+            <param name="bed_source_selector" type="select" label="Will you select a bed file from your history or use a built-in bed?">
+                <option value="cached">Use a built-in bed</option>
+                <option value="history">Use a bed from history as reference</option>
+            </param>
+            <when value="cached">
+                <param name="bed_cached" type="select" label="Using reference bed" help="Select bed from the list">
+                    <options from_data_table="covacs_bed">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No bed are available" />
+                    </options>
+                    <validator type="no_options" message="A built-in bed file is not available"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="bed_history" type="data" format="bed" label="Use the following dataset as reference bed " help="You can upload a bed file to the history and use it" optional="true" />
+            </when> 
+      </conditional>
+  </macro>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/covacs_HaplotypeCaller.xml	Fri Nov 09 06:02:18 2018 -0500
@@ -0,0 +1,133 @@
+ <tool id="covacs_HaplotypeCaller" name="covacs_HaplotypeCaller" version="3.8">
+  <description>GATK HaplotypeCaller wrapper Version 3.8</description>
+  <macros>
+	<import>bed_macros.xml</import>
+  </macros>
+  <requirements>
+	<requirement type="package" version="1.7" >samtools</requirement>
+  	<requirement type="package" version="3.8" >gatk</requirement>
+  </requirements>
+  <command>
+    <![CDATA[
+	### call the .sh to untar the package 
+        sh $__tool_directory__/mv_untar_gatk.sh &&
+
+	##sym link to run GATK
+	#if $dbsnp
+	
+	ln -s $dbsnp dbsnp.vcf &&
+	
+	#end if 
+ 	
+	#if $bed_source.bed_source_selector == "history " and $bed_source.bed_history
+        ln -s  $bed_source.bed_history region.bed &&
+	#end if	
+	
+	###indexing and symlink to run GATK	
+	samtools index -@ \${GALAXY_SLOTS:-4} $input1 &&
+	ln -s $input1".bai" input1.bam.bai &&
+	ln -s $input1 input1.bam &&
+	
+	##GATK tool call
+
+	java -jar  \$CONDA_PREFIX/../../GenomeAnalysisTK.jar -T HaplotypeCaller
+	
+	
+	#if $bed_source.bed_source_selector == "history" and $bed_source.bed_history
+
+	-L region.bed
+
+	#end if
+	
+	#if $bed_source.bed_source_selector == "cached"
+        
+	-L $bed_source.bed_cached.fields.path
+        
+	#end if	
+
+	##call chose genome from mutect2_indexes.loc
+      		-R $ref_file.fields.path
+	
+	##bam input parameter
+
+	-I input1.bam
+	
+	#if $standCall
+	-stand_call_conf $standCall
+	#end if
+	
+
+	##adding dbsnp file
+	#if $dbsnp
+	--dbsnp dbsnp.vcf
+	#end if	
+
+	##GVCF output
+	#if str($mode) == "GVCF"
+	--emitRefConfidence GVCF
+	-variant_index_type LINEAR -variant_index_parameter 128000
+	#end if
+
+	##outputs
+
+	-o $output 2> $log
+
+	]]>
+  </command>
+  <inputs>
+    <param name="ref_file" type="select" label="Using indexed reference genome" help="Select genome index from the list">
+       <options from_data_table="covacs_gatk_indexes">
+         <filter type="sort_by" column="2" />
+         <validator type="no_options" message="No indexes are available" />
+       </options>
+       <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+    </param>
+    <param format="bam" name="input1" label="bam file" type="data" optional="true" />
+    <expand macro="bed_loc"/>
+    <param format="vcf" name="dbsnp" label="input dbsnp file" type="data" optional="true" help="rsIDs from this file are used to populate the ID column of the output"/>
+    <param name="standCall" value="30" type="integer" optional="true" label="-stand_call_conf" help="The minimum phred-scaled confidence threshold at which  variants should be called"/>
+    <param name="mode" type="select" label="choose output format" >
+	<option value="GVCF">GVCF_output</option>
+	<option value="VCF">VCF_output</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data format="vcf" name="output" label="${tool.name} on ${on_string}:vcf or GVCF"/>
+    <data format="txt" name="log" label="${tool.name} on ${on_string}:log"/>
+  </outputs>
+  <help> 
+.. class:: warningmark
+
+**IMPORTANT** to get the wrapper ready to start the admin user have to download gatk GATK version 3.8 in tar.gz extension from the broadinstitute site https://software.broadinstitute.org/gatk/download/archive and then move it in the conda_prefix folder, the path of the conda_prefix is written in the galaxy.ini(or .yml) file
+
+		**more informations** at https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php
+
+-----
+
+**Implemented options** HaplotypeCaller:
+
+**-L**		: One or more genomic intervals over which to operate(file.bed)
+
+**-stand\_call\_conf**		: The minimum phred-scaled confidence threshold at which  variants should be called
+
+**-R**		: Reference sequence file
+
+**--dbsnp**			 dbsnp file : rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. dbSNP is not used in any way for the calculations themselves.
+
+-----
+
+**in case of output in GVCF format the following fixed option are implemented:**
+
+**--emitRefConfidence** (GVCF|VCF) : Mode for emitting reference confidence scores
+
+**-variant_index_type** LINEAR : Type of IndexCreator to use for VCF indices
+
+**-variant_index_parameter** 128000 :  Parameter to pass to the VCF/BCF IndexCreator
+
+
+  </help>
+  <citations>
+        <citation type="doi">10.1186/s12864-018-4508-1</citation>
+  </citations>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mv_untar_gatk.sh	Fri Nov 09 06:02:18 2018 -0500
@@ -0,0 +1,9 @@
+#!/bin/bash
+#if the .jar file is not present in the conda_prefix the script search the tar.gz in the conda_prefix of the vm
+#and untar the archive
+if [[ ! -f $CONDA_PREFIX/../../GenomeAnalysisTK.jar ]] ; then
+	tar -zxvf $CONDA_PREFIX/../../GenomeAnalysis*.tar.gz -C $CONDA_PREFIX/../../ 
+	
+else
+	echo GATK is present
+fi
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/covacs_bed.loc.sample	Fri Nov 09 06:02:18 2018 -0500
@@ -0,0 +1,17 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory bed file for covacs sequences data files. You will need
+#to create these data files and then create a bed_loc.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bed_loc.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_id>   <dbkey>   <display_name>   <file_path>
+#
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
+hg19	hg19	hg19-padded	/export/BED/S07084713_Padded.bed
+hgbed	hg19	hg19-bed-test	/export/BED/chr22.bed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/covacs_gatk_indexes.loc.sample	Fri Nov 09 06:02:18 2018 -0500
@@ -0,0 +1,36 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of all covacs wrapper that need a gatk reference. You will need
+#to create these data files and then create a covacs_gatk_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The covacs_gatk_indexes.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_path>
+#
+#So, for example, if you had phiX indexed stored in 
+#/depot/data2/galaxy/phiX/base/, 
+#then the bwa_index.loc entry would look like this:
+#
+#phiX174   phiX   phiX Pretty   /depot/data2/galaxy/phiX/base/phiX.fa
+#
+#and your /depot/data2/galaxy/phiX/base/ directory
+#would contain phiX.dict, phiX.fa.fai files.
+#
+#
+#Your covacs_gatk_indexes.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files.  For example:
+#
+#phiX174                                phiX    phiX174                 /depot/data2/galaxy/phiX/base/phiX.fa
+#hg18canon                              hg18    hg18 Canonical  /depot/data2/galaxy/hg18/base/hg18canon.fa
+#hg18full                               hg18    hg18 Full               /depot/data2/galaxy/hg18/base/hg18full.fa
+#/orig/path/hg19.fa             hg19    hg19                    /depot/data2/galaxy/hg19/base/hg19.fa
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
+hg38	hg38	hg38_GDC	/export/gatkhg38pl/GRCh38.d1.vd1.fa
+hg19	hg19	hg19	/export/gatk_hg19_index_bundle/ucsc.hg19.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Nov 09 06:02:18 2018 -0500
@@ -0,0 +1,12 @@
+<tables>
+<!-- Location of bed-file for covacs -->
+    <table name="covacs_bed" comment_char="#">
+        <columns> value, dbkey, name, path</columns>
+        <file path="tool-data/covacs_bed.loc" />
+    </table>
+<!-- Location of index file  for covacs gatk wrapper -->
+    <table name="covacs_gatk_indexes" comment_char="#">
+        <columns> value, dbkey, name, path</columns>
+        <file path="tool-data/covacs_gatk_indexes.loc" />
+    </table>
+</tables>