Mercurial > repos > elixir-it > covacs_varscan2
changeset 0:eaac11a49bd2 draft
Uploaded
author | elixir-it |
---|---|
date | Fri, 09 Nov 2018 06:06:32 -0500 |
parents | |
children | 44e9fd8fd25a |
files | bed_macros.xml covacs_varscan2.xml filter.varscan.pl tool-data/covacs_bed.loc.sample tool-data/covacs_gatk_indexes.loc.sample tool_data_table_conf.xml.sample |
diffstat | 6 files changed, 276 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bed_macros.xml Fri Nov 09 06:06:32 2018 -0500 @@ -0,0 +1,22 @@ +<macros> + <macro name="bed_loc"> + <conditional name="bed_source"> + <param name="bed_source_selector" type="select" label="Will you select a bed file from your history or use a built-in bed?"> + <option value="cached">Use a built-in bed</option> + <option value="history">Use a bed from history as reference</option> + </param> + <when value="cached"> + <param name="bed_cached" type="select" label="Using reference bed" help="Select bed from the list"> + <options from_data_table="covacs_bed"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No bed are available" /> + </options> + <validator type="no_options" message="A built-in bed file is not available"/> + </param> + </when> + <when value="history"> + <param name="bed_history" type="data" format="bed" label="Use the following dataset as reference bed " help="You can upload a bed file to the history and use it" optional="true" /> + </when> + </conditional> + </macro> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/covacs_varscan2.xml Fri Nov 09 06:06:32 2018 -0500 @@ -0,0 +1,167 @@ +<tool id="covacs_varsca2" name="covacs_varscan2" version="2.4.2"> + <description>varscan2 wrapper for covacs</description> + <requirements> + <requirement type="package" version="2.4.2">varscan</requirement> + </requirements> + <macros> + <import>bed_macros.xml</import> + </macros> + <stdio> + <regex match="Exception" source="both" level="fatal" description="Tool exception"/> + <regex match=".*" source="both" level="log" description="tool progress"/> + </stdio> + + <version_command><![CDATA[ +varscan 2>&1 | head -n 1 + ]]></version_command> + + <command><![CDATA[ + ## Set up samples list file. + #if $sample_names.strip() != '': + echo $sample_names | awk -F ',' '{ for (i = 1; i <= NF; i++) { print \$i; } }' > samples_list.txt && + #end if + + ## Set up command + input. + varscan ${cmd} ${input} + --min-coverage ${min_coverage} + --min-reads2 ${min_supporting_reads} + --min-avg-qual ${min_avg_qual} + --min-var-freq ${min_var_freq} + --min-freq-for-hom ${min_freq_for_hom} + --p-value ${p_value} + #if str($strand_filter) == 'yes': + --strand-filter 1 + #end if + + ## Report only variants in consensus. + #if str($cmd) == 'mpileup2cns': + --variants + #end if + + ## varscan bed limit added + + #if $bed_source.bed_source_selector == "history" and $bed_source.bed_history + limit $bed_source.bed_history + #end if + #if $bed_source.bed_source_selector == "cached" + limit $bed_source.bed_cached.fields.path + #end if + + ## Set up outputs. + --output-vcf 1 > $output + + #if $sample_names.strip() != '': + --vcf-sample-list samples_list.txt + #end if + + && perl $__tool_directory__/filter.varscan.pl $output $output_filtered + ]]></command> + + <inputs> + <param format="pileup" name="input" type="data" label="Pileup dataset" help=""/> + <param name="cmd" type="select" label="Analysis type"> + <option value="mpileup2snp" selected="True">single nucleotide variation</option> + <option value="mpileup2indel">insertions and deletions</option> + <option value="mpileup2cns">consensus genotype</option> + </param> + <expand macro="bed_loc"/> + <param name="min_coverage" type="integer" value="8" min="1" max="200" label="Minimum read depth" help="Minimum depth at a position to make a call"/> + <param name="min_supporting_reads" type="integer" value="2" min="1" max="200" label="Minimum supporting reads" help="Minimum supporting reads at a position to make a call"/> + <param name="min_avg_qual" type="integer" value="15" min="1" max="50" label="Minimum base quality at a position to count a read"/> + <param name="min_var_freq" type="float" value="0.01" min="0" max="1" label="Minimum variant allele frequency threshold"/> + <param name="min_freq_for_hom" type="float" value="0.75" min="0" max="1" label="Minimum frequency to call homozygote"/> + <param name="p_value" type="float" value="0.99" min="0" max="1" label="p-value threshold for calling variants"/> + <param name="strand_filter" type="select" label="Ignore variants with >90% support on one strand"> + <option value="no" selected="True">no</option> + <option value="yes">yes</option> + </param> + <param name="sample_names" type="text" value="" help="Separate sample names by comma; leave blank to use default sample names."/> + </inputs> + + <outputs> + <data name="output" format="vcf"/> + <data format="vcf" name="output_filtered" label="varscan2 filtered on ${on_string}" /> + </outputs> + <tests> + <test> + <param name="input" value="test_in1.pileup" /> + <param name="cmd" value="mpileup2cns" /> + <param name="min_coverage" value="8" /> + <param name="min_supporting_reads" value="2" /> + <param name="min_avg_qual" value="15" /> + <param name="min_var_freq" value="0.01" /> + <param name="min_freq_for_hom" value="0.75" /> + <param name="p_value" value="0.99" /> + <param name="strand_filter" value="no" /> + <param name="sample_names" value="" /> + <output name="output" file="test_out1.vcf" lines_diff="0" /> + </test> + </tests> + + <help> +**VarScan Overview** + +VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data. It calls variants from a mpileup dataset and produces a VCF 4.1 Full documentation is available online_. + +.. _VarScan: http://dkoboldt.github.io/varscan/ +.. _online: http://dkoboldt.github.io/varscan/using-varscan.html + +**Input** + +:: + + mpileup file - The SAMtools mpileup file + +**the wrapper gives 2 output** + +-the normal output of varscan2 + +-the output filtered by the filter.varscan.pl script implemented in the covacs pipeline + + + +**Parameters** + +:: + + analysis type + single nucleotide detection Identify SNPs from an mpileup file + insertions and deletion Identify indels an mpileup file + consensus genotype Call consensus and variants from an mpileup file + + min-coverage + Minimum read depth at a position to make a call [8] + + min-reads2 + Minimum supporting reads at a position to call variants [2] + + min-avg-qual + Minimum base quality at a position to count a read [15] + + min-var-freq + Minimum variant allele frequency threshold [0.01] + + min-freq-for-hom + Minimum frequency to call homozygote [0.75] + + p-value + Default p-value threshold for calling variants [99e-02] + + strand-filter + Ignore variants with >90% support on one strand [1] + + output-vcf + If set to 1, outputs in VCF format + + vcf-sample-list + For VCF output, a list of sample names in order, one per line + + variants + Report only variant (SNP/indel) positions [0] + </help> + + <citations> + <citation type="doi">10.1101/gr.129684.111</citation> + <citation type="doi">10.1186/s12864-018-4508-1</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter.varscan.pl Fri Nov 09 06:06:32 2018 -0500 @@ -0,0 +1,22 @@ +#!/usr/bin/perl -w +$f=shift; +$outfile=shift; +open(OUT,">$outfile"); +open(IN,$f); +$head=<IN>; +print OUT $head; +while(<IN>) +{ + if ($_=~/^\#/) + { + print OUT; + next; + } + $v=(split())[-1]; + @vl=(split(/\:/,$v)); + $vt=$vl[2]; + if ($vt>=10) + { + print OUT; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/covacs_bed.loc.sample Fri Nov 09 06:06:32 2018 -0500 @@ -0,0 +1,17 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory bed file for covacs sequences data files. You will need +#to create these data files and then create a bed_loc.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bed_loc.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_id> <dbkey> <display_name> <file_path> +# +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +# +hg19 hg19 hg19-padded /export/BED/S07084713_Padded.bed +hgbed hg19 hg19-bed-test /export/BED/chr22.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/covacs_gatk_indexes.loc.sample Fri Nov 09 06:06:32 2018 -0500 @@ -0,0 +1,36 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of all covacs wrapper that need a gatk reference. You will need +#to create these data files and then create a covacs_gatk_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The covacs_gatk_indexes.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, for example, if you had phiX indexed stored in +#/depot/data2/galaxy/phiX/base/, +#then the bwa_index.loc entry would look like this: +# +#phiX174 phiX phiX Pretty /depot/data2/galaxy/phiX/base/phiX.fa +# +#and your /depot/data2/galaxy/phiX/base/ directory +#would contain phiX.dict, phiX.fa.fai files. +# +# +#Your covacs_gatk_indexes.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#phiX174 phiX phiX174 /depot/data2/galaxy/phiX/base/phiX.fa +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/hg18/base/hg18canon.fa +#hg18full hg18 hg18 Full /depot/data2/galaxy/hg18/base/hg18full.fa +#/orig/path/hg19.fa hg19 hg19 /depot/data2/galaxy/hg19/base/hg19.fa +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +# +hg38 hg38 hg38_GDC /export/gatkhg38pl/GRCh38.d1.vd1.fa +hg19 hg19 hg19 /export/gatk_hg19_index_bundle/ucsc.hg19.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Nov 09 06:06:32 2018 -0500 @@ -0,0 +1,12 @@ +<tables> +<!-- Location of bed-file for covacs --> + <table name="covacs_bed" comment_char="#"> + <columns> value, dbkey, name, path</columns> + <file path="tool-data/covacs_bed.loc" /> + </table> +<!-- Location of index file for covacs gatk wrapper --> + <table name="covacs_gatk_indexes" comment_char="#"> + <columns> value, dbkey, name, path</columns> + <file path="tool-data/covacs_gatk_indexes.loc" /> + </table> +</tables>