changeset 0:03593410f057 draft

Uploaded
author elixir-it
date Fri, 09 Nov 2018 05:25:43 -0500
parents
children cbe203c9bc3a
files bed_macros.xml covacs_freebayes.xml covacs_macros.xml filter.fb.pl tool-data/covacs_bed.loc.sample tool-data/covacs_gatk_indexes.loc.sample tool_data_table_conf.xml.sample
diffstat 7 files changed, 221 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bed_macros.xml	Fri Nov 09 05:25:43 2018 -0500
@@ -0,0 +1,22 @@
+<macros>
+  <macro name="bed_loc">
+      <conditional name="bed_source">
+            <param name="bed_source_selector" type="select" label="Will you select a bed file from your history or use a built-in bed?">
+                <option value="cached">Use a built-in bed</option>
+                <option value="history">Use a bed from history as reference</option>
+            </param>
+            <when value="cached">
+                <param name="bed_cached" type="select" label="Using reference bed" help="Select bed from the list">
+                    <options from_data_table="covacs_bed">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No bed are available" />
+                    </options>
+                    <validator type="no_options" message="A built-in bed file is not available"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="bed_history" type="data" format="bed" label="Use the following dataset as reference bed " help="You can upload a bed file to the history and use it" optional="true" />
+            </when> 
+      </conditional>
+  </macro>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/covacs_freebayes.xml	Fri Nov 09 05:25:43 2018 -0500
@@ -0,0 +1,91 @@
+ <tool id="covacs_frebayes" name="covacs_freebayes" version="1.2.0">
+  <description>Bayesian genetic variant detection, freebayes V = 1.2.0 </description>
+  <macros>
+  <import>covacs_macros.xml</import>
+  <import>bed_macros.xml</import>
+  </macros>
+  <requirements>
+	<requirement type="package" version="1.2.0" >freebayes</requirement>
+  </requirements>
+  <command>
+    <![CDATA[
+
+	
+	
+	
+	freebayes
+	
+	   ##call chose genome from mutect2_indexes.loc and from history 
+        #if $reference_source.reference_source_selector == "cached"
+                -f $reference_source.ref_file.fields.path
+        #end if
+        #if $reference_source.reference_source_selector == "history" 
+		-f $reference_source.ref_file_h
+	#end if
+	
+	#if $input2
+	-C $input2
+	#end if
+	
+	#if $bed_source.bed_source_selector == "history" and $bed_source.bed_history
+        -t  $bed_source.bed_history 
+        #end if
+        #if $bed_source.bed_source_selector == "cached"
+        -t $bed_source.bed_cached.fields.path
+        #end if
+
+	$input1
+	> $output
+	
+
+        && perl  $__tool_directory__/filter.fb.pl $output $output_filtered
+
+
+	]]>
+  </command>
+  <inputs>
+    <expand macro="reference_loc"/>
+    <param format="bam" name="input1" label="bam file" type="data" optional="true" />
+    <expand macro="bed_loc"/>
+    <param name="input2" label="-C min-alternate-count" type="integer" value="5" optional="true"/>
+  </inputs>
+  <outputs>
+    <data format="vcf" name="output" label="${tool.name} on ${on_string}:vcf"/>
+    <data format="vcf" name="output_filtered" label="freebayes filtered on ${on_string}" />
+  </outputs>
+  <help>
+**Currently available options**
+
+-f --fasta-reference FILE
+                   Use FILE as the reference sequence for analysis.
+                   An index file (FILE.fai) will be created if none exists.
+                   If neither --targets nor --region are specified, FreeBayes
+                   will analyze every position in this reference.
+
+-C --min-alternate-count N
+                   Require at least this count of observations supporting
+                   an alternate allele within a single individual in order
+                   to evaluate the position.
+-t --targets FILE
+                   Limit analysis to targets listed in the BED-format FILE.
+
+**Two output file are generated** 
+
+The first output consists in a vcf file containing all the variants detected by Freebayes, the second file contains a subset of the variants filtered according to their QUAL score (QUAL >=20), see the CoVaCS paper for more details.
+
+
+  </help>
+  <citations>
+        <citation type="doi">10.1186/s12864-018-4508-1</citation>
+	 <citation type="bibtex">
+                @misc{1207.3907,
+                    Author = {Erik Garrison},
+                    Title = {Haplotype-based variant detection from short-read sequencing},
+                    Year = {2012},
+                    Eprint = {arXiv:1207.3907},
+                    url = {http://arxiv.org/abs/1207.3907}
+                }
+         </citation>
+  </citations>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/covacs_macros.xml	Fri Nov 09 05:25:43 2018 -0500
@@ -0,0 +1,22 @@
+<macros>
+  <macro name="reference_loc">
+      <conditional name="reference_source">
+            <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in fasta?" >
+                <option value="cached">Use a built-in genome index</option>
+                <option value="history">Use a genome from history and build index</option>
+            </param>
+            <when value="cached">
+                <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
+                    <options from_data_table="covacs_gatk_indexes">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No genomes are available" />
+                    </options>
+                    <validator type="no_options" message="A reference genome is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="ref_file_h" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
+            </when> 
+      </conditional>
+  </macro>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter.fb.pl	Fri Nov 09 05:25:43 2018 -0500
@@ -0,0 +1,21 @@
+#!/usr/bin/perl -w
+#
+$f=shift;
+$out=shift;
+open(IN,$f);
+open(OUT,">$out");
+while(<IN>)
+{
+	if ($_=~/^#/)
+	{
+		print OUT;
+		next;
+	}else{
+		$vl=(split())[5];
+		$gt=(split())[-1];
+		$gt=(split(/\:/,$gt))[0];
+		next if $gt eq "0/0";
+		print OUT  if $vl>20;
+	}
+}
+close(OUT);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/covacs_bed.loc.sample	Fri Nov 09 05:25:43 2018 -0500
@@ -0,0 +1,17 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory bed file for covacs sequences data files. You will need
+#to create these data files and then create a bed_loc.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bed_loc.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_id>   <dbkey>   <display_name>   <file_path>
+#
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
+hg19	hg19	hg19-padded	/export/BED/S07084713_Padded.bed
+hgbed	hg19	hg19-bed-test	/export/BED/chr22.bed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/covacs_gatk_indexes.loc.sample	Fri Nov 09 05:25:43 2018 -0500
@@ -0,0 +1,36 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of all covacs wrapper that need a gatk reference. You will need
+#to create these data files and then create a covacs_gatk_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The covacs_gatk_indexes.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_path>
+#
+#So, for example, if you had phiX indexed stored in 
+#/depot/data2/galaxy/phiX/base/, 
+#then the bwa_index.loc entry would look like this:
+#
+#phiX174   phiX   phiX Pretty   /depot/data2/galaxy/phiX/base/phiX.fa
+#
+#and your /depot/data2/galaxy/phiX/base/ directory
+#would contain phiX.dict, phiX.fa.fai files.
+#
+#
+#Your covacs_gatk_indexes.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files.  For example:
+#
+#phiX174                                phiX    phiX174                 /depot/data2/galaxy/phiX/base/phiX.fa
+#hg18canon                              hg18    hg18 Canonical  /depot/data2/galaxy/hg18/base/hg18canon.fa
+#hg18full                               hg18    hg18 Full               /depot/data2/galaxy/hg18/base/hg18full.fa
+#/orig/path/hg19.fa             hg19    hg19                    /depot/data2/galaxy/hg19/base/hg19.fa
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
+hg38	hg38	hg38_GDC	/export/gatkhg38pl/GRCh38.d1.vd1.fa
+hg19	hg19	hg19	/export/gatk_hg19_index_bundle/ucsc.hg19.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Nov 09 05:25:43 2018 -0500
@@ -0,0 +1,12 @@
+<tables>
+<!-- Location of bed-file for covacs -->
+    <table name="covacs_bed" comment_char="#">
+        <columns> value, dbkey, name, path</columns>
+        <file path="tool-data/covacs_bed.loc" />
+    </table>
+<!-- Location of index file  for covacs gatk wrapper -->
+    <table name="covacs_gatk_indexes" comment_char="#">
+        <columns> value, dbkey, name, path</columns>
+        <file path="tool-data/covacs_gatk_indexes.loc" />
+    </table>
+</tables>