0
|
1 <tool id="gatk_analyze_covariates" name="Analyze Covariates" version="0.0.5">
|
|
2 <description>- draw plots</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="1.4">gatk</requirement>
|
|
5 </requirements>
|
|
6 <macros>
|
|
7 <import>gatk_macros.xml</import>
|
|
8 </macros>
|
|
9 <command interpreter="python">gatk_wrapper.py
|
|
10 --max_jvm_heap_fraction "1"
|
|
11 --stdout "${output_log}"
|
|
12 --html_report_from_directory "${output_html}" "${output_html.files_path}"
|
|
13 -p 'java
|
|
14 -jar "${JAVA_JAR_PATH}/AnalyzeCovariates.jar"
|
|
15 -recalFile "${input_recal}"
|
|
16 -outputDir "${output_html.files_path}"
|
|
17 ##--num_threads 4 ##hard coded, for now
|
|
18 ##-log "${output_log}"
|
|
19 ##-Rscript,--path_to_Rscript path_to_Rscript; on path is good enough
|
|
20 #if $analysis_param_type.analysis_param_type_selector == "advanced":
|
|
21 --ignoreQ "${analysis_param_type.ignore_q}"
|
|
22 --numRG "${analysis_param_type.num_read_groups}"
|
|
23 --max_quality_score "${analysis_param_type.max_quality_score}"
|
|
24 --max_histogram_value "${analysis_param_type.max_histogram_value}"
|
|
25 ${analysis_param_type.do_indel_quality}
|
|
26 #end if
|
|
27 '
|
|
28 </command>
|
|
29 <inputs>
|
|
30 <param name="input_recal" type="data" format="csv" label="Covariates table recalibration file" help="-recalFile,--recal_file &lt;recal_file&gt;" />
|
|
31 <conditional name="analysis_param_type">
|
|
32 <param name="analysis_param_type_selector" type="select" label="Basic or Advanced options">
|
|
33 <option value="basic" selected="True">Basic</option>
|
|
34 <option value="advanced">Advanced</option>
|
|
35 </param>
|
|
36 <when value="basic">
|
|
37 <!-- Do nothing here -->
|
|
38 </when>
|
|
39 <when value="advanced">
|
|
40 <param name="ignore_q" type="integer" value="5" label="Ignore bases with reported quality less than this number." help="-ignoreQ,--ignoreQ &lt;ignoreQ&gt; "/>
|
|
41 <param name="num_read_groups" type="integer" value="-1" label="Only process N read groups." help="-numRG,--numRG &lt;numRG&gt;"/>
|
|
42 <param name="max_quality_score" type="integer" value="50" label="Max quality score" help="-maxQ,--max_quality_score &lt;max_quality_score&gt;"/>
|
|
43 <param name="max_histogram_value" type="integer" value="0" label="Max histogram value" help="-maxHist,--max_histogram_value &lt;max_histogram_value&gt;"/>
|
|
44 <param name="do_indel_quality" type="boolean" truevalue="--do_indel_quality" falsevalue="" label="Do indel quality" help="--do_indel_quality"/>
|
|
45 </when>
|
|
46 </conditional>
|
|
47 </inputs>
|
|
48 <outputs>
|
|
49 <data format="html" name="output_html" label="${tool.name} on ${on_string} (HTML)" />
|
|
50 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
|
|
51 </outputs>
|
|
52 <tests>
|
|
53 <test>
|
|
54 <param name="input_recal" value="gatk/gatk_count_covariates/gatk_count_covariates_out_1.csv" ftype="csv" />
|
|
55 <param name="analysis_param_type_selector" value="basic" />
|
|
56 <output name="output_html" file="gatk/gatk_analyze_covariates/gatk_analyze_covariates_out_1.html" />
|
|
57 <output name="output_log" file="gatk/gatk_analyze_covariates/gatk_analyze_covariates_out_1.log.contains" compare="contains" />
|
|
58 </test>
|
|
59 </tests>
|
|
60 <help>
|
|
61 **What it does**
|
|
62
|
|
63 Create collapsed versions of the recal csv file and call R scripts to plot residual error versus the various covariates.
|
|
64
|
|
65 For more information on base quality score recalibration using the GATK, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration>`_.
|
|
66
|
|
67 To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_.
|
|
68
|
|
69 If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_.
|
|
70
|
|
71 ------
|
|
72
|
|
73 **Inputs**
|
|
74
|
|
75 GenomeAnalysisTK: AnalyzeCovariates accepts an recal CSV file.
|
|
76
|
|
77
|
|
78 **Outputs**
|
|
79
|
|
80 The output is in CSV and HTML files with links to PDF graphs and a data files.
|
|
81
|
|
82
|
|
83 Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats.
|
|
84
|
|
85 -------
|
|
86
|
|
87 **Settings**::
|
|
88
|
|
89 recal_file The input recal csv file to analyze
|
|
90 output_dir The directory in which to output all the plots and intermediate data files
|
|
91 path_to_Rscript The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript
|
|
92 path_to_resources Path to resources folder holding the Sting R scripts.
|
|
93 ignoreQ Ignore bases with reported quality less than this number.
|
|
94 numRG Only process N read groups. Default value: -1 (process all read groups)
|
|
95 max_quality_score The integer value at which to cap the quality scores, default is 50
|
|
96 max_histogram_value If supplied, this value will be the max value of the histogram plots
|
|
97 do_indel_quality If supplied, this value will be the max value of the histogram plots
|
|
98
|
|
99 @CITATION_SECTION@
|
|
100 </help>
|
|
101 </tool>
|