view covacs_Select_Filtration.xml @ 2:5058867ac48f draft default tip

Uploaded
author elixir-it
date Wed, 02 Oct 2019 04:31:08 -0400
parents 6d8aa1176a94
children
line wrap: on
line source

 <tool id="covacs_Select_Filtration" name="covacs_Select_filtration" version="3.8">
  <description>SelectVariants VariantFiltration wrapper for covacs, use in case of not enough snp or indels error in covacs_VariantRecalibrator</description>
  <macros>
  </macros>
  <requirements>
  	<requirement type="package" version="3.8" >gatk</requirement>
  </requirements>
  <command>
    <![CDATA[
	### call the .sh to untar the package 
	bash $__tool_directory__/mv_untar_gatk.sh &> $log &&

	##sym link to run GATK

	ln -s $input1 input1.vcf &&
	
	##GATK tool call
	java -jar  \$CONDA_PREFIX/../../GenomeAnalysisTK.jar
	-T SelectVariants 
	-R $ref_file.fields.path
	-V input1.vcf
	-selectType $TYPE
	-o variants_recal.indels.vcf 2>$log 
		&&
	java -jar  \$CONDA_PREFIX/../../GenomeAnalysisTK.jar 
	-T VariantFiltration 
	-R $ref_file.fields.path
	-V variants_recal.indels.vcf 
	--filterExpression "DP<8 || QD < 2.0 || FS > 200.0 || ReadPosRankSum < -20.0" 
	--filterName "filter_LQ_$TYPE" 
	-o variants_recal.filtered.small.panel.region.vcf

	2>> $log
	]]>
  </command>
  <inputs>
    <param format="vcf" name="input1" label="input VCF" type="data" optional="true" />
    <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
       <options from_data_table="covacs_gatk_indexes">
      	 <filter type="sort_by" column="2" />
       	 <validator type="no_options" message="No indexes are available" />
       </options>
       <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
    </param>
    <param name="TYPE" type="select" optional="true">
        <option value="INDEL">INDEL</option>
        <option value="SNP" selected="true" >SNP</option>
    </param>	
  </inputs>
  <outputs>
    <data format="vcf" name="recal" from_work_dir="variants_recal.indels.vcf" label="SelectVariants on ${on_string} $TYPE :recal"/>
    <data format="vcf" name="filtered" from_work_dir="variants_recal.filtered.small.panel.region.vcf" label="VariantFiltration on ${on_string} $TYPE :recal"/>
    <data format="txt" name="log" label="log"/>
  </outputs>
  <help>
 **IMPORTANT** to get the wrapper ready to start the admin user have to download gatk GATK version 3.8 from the broadinstitute site https://software.broadinstitute.org/gatk/download/archive and then move it in the conda_prefix folder, the path of the conda_prefix is written in the galaxy.ini(or .yml) file 

		**more informations** at https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_variantutils_SelectVariants.php , https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_filters_VariantFiltration.php

**Implemented options** SelectVariants <![CDATA[ &]]> VariantFiltration

-R 		Reference sequence file

-V 		vcf input

other options are fixed based on covacs pipeline

**description** this step can be used if covacs_indel_snp have not enough data to create the model, it calls SelectVariants and VariantFiltration applying the filter expression --filterExpression <![CDATA["DP<8 || QD < 2.0 || FS > 200.0 || ReadPosRankSum < -20.0"]]> to filter the variants both snp and indels

  </help>
  <citations>
        <citation type="doi">10.1186/s12864-018-4508-1</citation>
  </citations>
</tool>