Mercurial > repos > elixir-it > covacs_select_filtration
changeset 0:6d8aa1176a94 draft
Uploaded
author | elixir-it |
---|---|
date | Fri, 09 Nov 2018 06:03:28 -0500 |
parents | |
children | 3a37867409fe |
files | covacs_Select_Filtration.xml mv_untar_gatk.sh tool-data/covacs_gatk_indexes.loc.sample tool_data_table_conf.xml.sample |
diffstat | 4 files changed, 132 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/covacs_Select_Filtration.xml Fri Nov 09 06:03:28 2018 -0500 @@ -0,0 +1,75 @@ + <tool id="covacs_Select_Filtration" name="covacs_Select_filtration" version="3.8"> + <description>SelectVariants VariantFiltration wrapper for covacs, use in case of not enough snp or indels error in covacs_VariantRecalibrator</description> + <macros> + </macros> + <requirements> + <requirement type="package" version="3.8" >gatk</requirement> + </requirements> + <command> + <![CDATA[ + ### call the .sh to untar the package + sh $__tool_directory__/mv_untar_gatk.sh && + + ##sym link to run GATK + + ln -s $input1 input1.vcf && + + ##GATK tool call + java -jar \$CONDA_PREFIX/../../GenomeAnalysisTK.jar + -T SelectVariants + -R $ref_file.fields.path + -V input1.vcf + -selectType $TYPE + -o variants_recal.indels.vcf 2>$log + && + java -jar \$CONDA_PREFIX/../../GenomeAnalysisTK.jar + -T VariantFiltration + -R $ref_file.fields.path + -V variants_recal.indels.vcf + --filterExpression "DP<8 || QD < 2.0 || FS > 200.0 || ReadPosRankSum < -20.0" + --filterName "filter_LQ_$TYPE" + -o variants_recal.filtered.small.panel.region.vcf + + 2>> $log + ]]> + </command> + <inputs> + <param format="vcf" name="input1" label="input VCF" type="data" optional="true" /> + <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> + <options from_data_table="covacs_gatk_indexes"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + <param name="TYPE" type="select" optional="true"> + <option value="INDEL">INDEL</option> + <option value="SNP" selected="true" >SNP</option> + </param> + </inputs> + <outputs> + <data format="vcf" name="recal" from_work_dir="variants_recal.indels.vcf" label="SelectVariants on ${on_string} $TYPE :recal"/> + <data format="vcf" name="filtered" from_work_dir="variants_recal.filtered.small.panel.region.vcf" label="VariantFiltration on ${on_string} $TYPE :recal"/> + <data format="txt" name="log" label="log"/> + </outputs> + <help> + **IMPORTANT** to get the wrapper ready to start the admin user have to download gatk GATK version 3.8 from the broadinstitute site https://software.broadinstitute.org/gatk/download/archive and then move it in the conda_prefix folder, the path of the conda_prefix is written in the galaxy.ini(or .yml) file + + **more informations** at https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_variantutils_SelectVariants.php , https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_filters_VariantFiltration.php + +**Implemented options** SelectVariants <![CDATA[ &]]> VariantFiltration + +-R Reference sequence file + +-V vcf input + +other options are fixed based on covacs pipeline + +**description** this step can be used if covacs_indel_snp have not enough data to create the model, it calls SelectVariants and VariantFiltration applying the filter expression --filterExpression <![CDATA["DP<8 || QD < 2.0 || FS > 200.0 || ReadPosRankSum < -20.0"]]> to filter the variants both snp and indels + + </help> + <citations> + <citation type="doi">10.1186/s12864-018-4508-1</citation> + </citations> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mv_untar_gatk.sh Fri Nov 09 06:03:28 2018 -0500 @@ -0,0 +1,9 @@ +#!/bin/bash +#if the .jar file is not present in the conda_prefix the script search the tar.gz in the conda_prefix of the vm +#and untar the archive +if [[ ! -f $CONDA_PREFIX/../../GenomeAnalysisTK.jar ]] ; then + tar -zxvf $CONDA_PREFIX/../../GenomeAnalysis*.tar.gz -C $CONDA_PREFIX/../../ + +else + echo GATK is present +fi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/covacs_gatk_indexes.loc.sample Fri Nov 09 06:03:28 2018 -0500 @@ -0,0 +1,36 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of all covacs wrapper that need a gatk reference. You will need +#to create these data files and then create a covacs_gatk_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The covacs_gatk_indexes.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, for example, if you had phiX indexed stored in +#/depot/data2/galaxy/phiX/base/, +#then the bwa_index.loc entry would look like this: +# +#phiX174 phiX phiX Pretty /depot/data2/galaxy/phiX/base/phiX.fa +# +#and your /depot/data2/galaxy/phiX/base/ directory +#would contain phiX.dict, phiX.fa.fai files. +# +# +#Your covacs_gatk_indexes.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#phiX174 phiX phiX174 /depot/data2/galaxy/phiX/base/phiX.fa +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/hg18/base/hg18canon.fa +#hg18full hg18 hg18 Full /depot/data2/galaxy/hg18/base/hg18full.fa +#/orig/path/hg19.fa hg19 hg19 /depot/data2/galaxy/hg19/base/hg19.fa +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +# +hg38 hg38 hg38_GDC /export/gatkhg38pl/GRCh38.d1.vd1.fa +hg19 hg19 hg19 /export/gatk_hg19_index_bundle/ucsc.hg19.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Nov 09 06:03:28 2018 -0500 @@ -0,0 +1,12 @@ +<tables> +<!-- Location of bed-file for covacs --> + <table name="covacs_bed" comment_char="#"> + <columns> value, dbkey, name, path</columns> + <file path="tool-data/covacs_bed.loc" /> + </table> +<!-- Location of index file for covacs gatk wrapper --> + <table name="covacs_gatk_indexes" comment_char="#"> + <columns> value, dbkey, name, path</columns> + <file path="tool-data/covacs_gatk_indexes.loc" /> + </table> +</tables>