changeset 0:eaac11a49bd2 draft

Uploaded
author elixir-it
date Fri, 09 Nov 2018 06:06:32 -0500
parents
children 44e9fd8fd25a
files bed_macros.xml covacs_varscan2.xml filter.varscan.pl tool-data/covacs_bed.loc.sample tool-data/covacs_gatk_indexes.loc.sample tool_data_table_conf.xml.sample
diffstat 6 files changed, 276 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bed_macros.xml	Fri Nov 09 06:06:32 2018 -0500
@@ -0,0 +1,22 @@
+<macros>
+  <macro name="bed_loc">
+      <conditional name="bed_source">
+            <param name="bed_source_selector" type="select" label="Will you select a bed file from your history or use a built-in bed?">
+                <option value="cached">Use a built-in bed</option>
+                <option value="history">Use a bed from history as reference</option>
+            </param>
+            <when value="cached">
+                <param name="bed_cached" type="select" label="Using reference bed" help="Select bed from the list">
+                    <options from_data_table="covacs_bed">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No bed are available" />
+                    </options>
+                    <validator type="no_options" message="A built-in bed file is not available"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="bed_history" type="data" format="bed" label="Use the following dataset as reference bed " help="You can upload a bed file to the history and use it" optional="true" />
+            </when> 
+      </conditional>
+  </macro>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/covacs_varscan2.xml	Fri Nov 09 06:06:32 2018 -0500
@@ -0,0 +1,167 @@
+<tool id="covacs_varsca2" name="covacs_varscan2" version="2.4.2">
+    <description>varscan2 wrapper for covacs</description>
+    <requirements>
+        <requirement type="package" version="2.4.2">varscan</requirement>
+    </requirements>
+    <macros>
+        <import>bed_macros.xml</import>
+    </macros>
+    <stdio>
+        <regex match="Exception" source="both" level="fatal" description="Tool exception"/>
+        <regex match=".*" source="both" level="log" description="tool progress"/>
+    </stdio>
+
+    <version_command><![CDATA[
+varscan 2>&1 | head -n 1
+    ]]></version_command>
+
+    <command><![CDATA[
+        ## Set up samples list file.
+        #if $sample_names.strip() != '':
+           echo $sample_names | awk -F ',' '{ for (i = 1; i <= NF; i++) { print \$i; } }' > samples_list.txt &&
+        #end if
+
+        ## Set up command + input.
+        varscan ${cmd} ${input}
+        --min-coverage ${min_coverage}
+        --min-reads2 ${min_supporting_reads}
+        --min-avg-qual ${min_avg_qual}
+        --min-var-freq ${min_var_freq}
+        --min-freq-for-hom ${min_freq_for_hom}
+        --p-value ${p_value}
+        #if str($strand_filter) == 'yes':
+          --strand-filter 1
+        #end if
+
+        ## Report only variants in consensus.
+        #if str($cmd) == 'mpileup2cns':
+          --variants
+        #end if
+        
+	## varscan bed limit added
+
+ 	#if $bed_source.bed_source_selector == "history" and $bed_source.bed_history
+      	limit $bed_source.bed_history
+      	#end if
+        #if $bed_source.bed_source_selector == "cached"
+      	limit $bed_source.bed_cached.fields.path
+      	#end if
+
+        ## Set up outputs.
+        --output-vcf 1 > $output 
+
+        #if $sample_names.strip() != '':
+            --vcf-sample-list samples_list.txt
+        #end if
+
+	&& perl  $__tool_directory__/filter.varscan.pl $output $output_filtered
+    ]]></command>
+
+    <inputs>
+        <param format="pileup" name="input" type="data" label="Pileup dataset" help=""/>
+        <param name="cmd" type="select" label="Analysis type">
+          <option value="mpileup2snp" selected="True">single nucleotide variation</option>
+          <option value="mpileup2indel">insertions and deletions</option>
+          <option value="mpileup2cns">consensus genotype</option>
+        </param>
+	<expand macro="bed_loc"/>
+        <param name="min_coverage" type="integer" value="8" min="1" max="200" label="Minimum read depth" help="Minimum depth at a position to make a call"/>
+        <param name="min_supporting_reads" type="integer" value="2" min="1" max="200" label="Minimum supporting reads" help="Minimum supporting reads at a position to make a call"/>
+        <param name="min_avg_qual" type="integer" value="15" min="1" max="50" label="Minimum base quality at a position to count a read"/>
+        <param name="min_var_freq" type="float" value="0.01" min="0" max="1" label="Minimum variant allele frequency threshold"/>
+        <param name="min_freq_for_hom" type="float" value="0.75" min="0" max="1" label="Minimum frequency to call homozygote"/>
+        <param name="p_value" type="float" value="0.99" min="0" max="1" label="p-value threshold for calling variants"/>
+        <param name="strand_filter" type="select" label="Ignore variants with >90% support on one strand">
+            <option value="no" selected="True">no</option>
+            <option value="yes">yes</option>
+        </param>
+        <param name="sample_names" type="text" value="" help="Separate sample names by comma; leave blank to use default sample names."/>
+    </inputs>
+
+    <outputs>
+        <data name="output" format="vcf"/>
+	<data format="vcf" name="output_filtered" label="varscan2 filtered on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="test_in1.pileup" />
+            <param name="cmd" value="mpileup2cns" />
+            <param name="min_coverage" value="8" />
+            <param name="min_supporting_reads" value="2" />
+            <param name="min_avg_qual" value="15" />
+            <param name="min_var_freq" value="0.01" />
+            <param name="min_freq_for_hom" value="0.75" />
+            <param name="p_value" value="0.99" />
+            <param name="strand_filter" value="no" />
+            <param name="sample_names" value="" />
+            <output name="output" file="test_out1.vcf" lines_diff="0" />
+        </test>
+    </tests>
+
+    <help>
+**VarScan Overview**
+
+VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data. It calls variants from a mpileup dataset and produces a VCF 4.1 Full documentation is available online_.
+
+.. _VarScan: http://dkoboldt.github.io/varscan/
+.. _online: http://dkoboldt.github.io/varscan/using-varscan.html
+
+**Input**
+
+::
+
+  mpileup file - The SAMtools mpileup file
+
+**the wrapper gives 2 output**
+
+-the normal output of varscan2
+
+-the output filtered by the filter.varscan.pl script implemented in the covacs pipeline
+
+
+
+**Parameters**
+
+::
+
+  analysis type
+    single nucleotide detection     Identify SNPs from an mpileup file
+    insertions and deletion       Identify indels an mpileup file
+    consensus genotype     Call consensus and variants from an mpileup file
+
+  min-coverage
+    Minimum read depth at a position to make a call [8]
+
+  min-reads2
+    Minimum supporting reads at a position to call variants [2]
+
+  min-avg-qual
+    Minimum base quality at a position to count a read [15]
+
+  min-var-freq
+        Minimum variant allele frequency threshold [0.01]
+
+  min-freq-for-hom
+    Minimum frequency to call homozygote [0.75]
+
+  p-value
+    Default p-value threshold for calling variants [99e-02]
+
+  strand-filter
+    Ignore variants with >90% support on one strand [1]
+
+  output-vcf
+    If set to 1, outputs in VCF format
+
+  vcf-sample-list
+    For VCF output, a list of sample names in order, one per line
+
+  variants
+    Report only variant (SNP/indel) positions [0]
+    </help>
+
+    <citations>
+        <citation type="doi">10.1101/gr.129684.111</citation>
+        <citation type="doi">10.1186/s12864-018-4508-1</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filter.varscan.pl	Fri Nov 09 06:06:32 2018 -0500
@@ -0,0 +1,22 @@
+#!/usr/bin/perl -w
+$f=shift;
+$outfile=shift;
+open(OUT,">$outfile");	
+open(IN,$f);
+$head=<IN>;
+print OUT $head;
+while(<IN>)
+{
+	if ($_=~/^\#/)
+	{
+		print OUT;
+		next;
+	}
+	$v=(split())[-1];
+	@vl=(split(/\:/,$v));
+	$vt=$vl[2];
+	if ($vt>=10)
+	{
+		print OUT;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/covacs_bed.loc.sample	Fri Nov 09 06:06:32 2018 -0500
@@ -0,0 +1,17 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory bed file for covacs sequences data files. You will need
+#to create these data files and then create a bed_loc.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bed_loc.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_id>   <dbkey>   <display_name>   <file_path>
+#
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
+hg19	hg19	hg19-padded	/export/BED/S07084713_Padded.bed
+hgbed	hg19	hg19-bed-test	/export/BED/chr22.bed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/covacs_gatk_indexes.loc.sample	Fri Nov 09 06:06:32 2018 -0500
@@ -0,0 +1,36 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of all covacs wrapper that need a gatk reference. You will need
+#to create these data files and then create a covacs_gatk_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The covacs_gatk_indexes.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_path>
+#
+#So, for example, if you had phiX indexed stored in 
+#/depot/data2/galaxy/phiX/base/, 
+#then the bwa_index.loc entry would look like this:
+#
+#phiX174   phiX   phiX Pretty   /depot/data2/galaxy/phiX/base/phiX.fa
+#
+#and your /depot/data2/galaxy/phiX/base/ directory
+#would contain phiX.dict, phiX.fa.fai files.
+#
+#
+#Your covacs_gatk_indexes.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files.  For example:
+#
+#phiX174                                phiX    phiX174                 /depot/data2/galaxy/phiX/base/phiX.fa
+#hg18canon                              hg18    hg18 Canonical  /depot/data2/galaxy/hg18/base/hg18canon.fa
+#hg18full                               hg18    hg18 Full               /depot/data2/galaxy/hg18/base/hg18full.fa
+#/orig/path/hg19.fa             hg19    hg19                    /depot/data2/galaxy/hg19/base/hg19.fa
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
+hg38	hg38	hg38_GDC	/export/gatkhg38pl/GRCh38.d1.vd1.fa
+hg19	hg19	hg19	/export/gatk_hg19_index_bundle/ucsc.hg19.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Nov 09 06:06:32 2018 -0500
@@ -0,0 +1,12 @@
+<tables>
+<!-- Location of bed-file for covacs -->
+    <table name="covacs_bed" comment_char="#">
+        <columns> value, dbkey, name, path</columns>
+        <file path="tool-data/covacs_bed.loc" />
+    </table>
+<!-- Location of index file  for covacs gatk wrapper -->
+    <table name="covacs_gatk_indexes" comment_char="#">
+        <columns> value, dbkey, name, path</columns>
+        <file path="tool-data/covacs_gatk_indexes.loc" />
+    </table>
+</tables>