Mercurial > repos > devteam > analyze_covariates
diff analyze_covariates.xml @ 0:332a7de8f98a draft
Imported from capsule None
author | devteam |
---|---|
date | Tue, 01 Apr 2014 09:11:43 -0400 |
parents | |
children | e531148fb585 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/analyze_covariates.xml Tue Apr 01 09:11:43 2014 -0400 @@ -0,0 +1,101 @@ +<tool id="gatk_analyze_covariates" name="Analyze Covariates" version="0.0.5"> + <description>- draw plots</description> + <requirements> + <requirement type="package" version="1.4">gatk</requirement> + </requirements> + <macros> + <import>gatk_macros.xml</import> + </macros> + <command interpreter="python">gatk_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + --html_report_from_directory "${output_html}" "${output_html.files_path}" + -p 'java + -jar "${JAVA_JAR_PATH}/AnalyzeCovariates.jar" + -recalFile "${input_recal}" + -outputDir "${output_html.files_path}" + ##--num_threads 4 ##hard coded, for now + ##-log "${output_log}" + ##-Rscript,--path_to_Rscript path_to_Rscript; on path is good enough + #if $analysis_param_type.analysis_param_type_selector == "advanced": + --ignoreQ "${analysis_param_type.ignore_q}" + --numRG "${analysis_param_type.num_read_groups}" + --max_quality_score "${analysis_param_type.max_quality_score}" + --max_histogram_value "${analysis_param_type.max_histogram_value}" + ${analysis_param_type.do_indel_quality} + #end if + ' + </command> + <inputs> + <param name="input_recal" type="data" format="csv" label="Covariates table recalibration file" help="-recalFile,--recal_file &lt;recal_file&gt;" /> + <conditional name="analysis_param_type"> + <param name="analysis_param_type_selector" type="select" label="Basic or Advanced options"> + <option value="basic" selected="True">Basic</option> + <option value="advanced">Advanced</option> + </param> + <when value="basic"> + <!-- Do nothing here --> + </when> + <when value="advanced"> + <param name="ignore_q" type="integer" value="5" label="Ignore bases with reported quality less than this number." help="-ignoreQ,--ignoreQ &lt;ignoreQ&gt; "/> + <param name="num_read_groups" type="integer" value="-1" label="Only process N read groups." help="-numRG,--numRG &lt;numRG&gt;"/> + <param name="max_quality_score" type="integer" value="50" label="Max quality score" help="-maxQ,--max_quality_score &lt;max_quality_score&gt;"/> + <param name="max_histogram_value" type="integer" value="0" label="Max histogram value" help="-maxHist,--max_histogram_value &lt;max_histogram_value&gt;"/> + <param name="do_indel_quality" type="boolean" truevalue="--do_indel_quality" falsevalue="" label="Do indel quality" help="--do_indel_quality"/> + </when> + </conditional> + </inputs> + <outputs> + <data format="html" name="output_html" label="${tool.name} on ${on_string} (HTML)" /> + <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> + </outputs> + <tests> + <test> + <param name="input_recal" value="gatk/gatk_count_covariates/gatk_count_covariates_out_1.csv" ftype="csv" /> + <param name="analysis_param_type_selector" value="basic" /> + <output name="output_html" file="gatk/gatk_analyze_covariates/gatk_analyze_covariates_out_1.html" /> + <output name="output_log" file="gatk/gatk_analyze_covariates/gatk_analyze_covariates_out_1.log.contains" compare="contains" /> + </test> + </tests> + <help> +**What it does** + +Create collapsed versions of the recal csv file and call R scripts to plot residual error versus the various covariates. + +For more information on base quality score recalibration using the GATK, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: AnalyzeCovariates accepts an recal CSV file. + + +**Outputs** + +The output is in CSV and HTML files with links to PDF graphs and a data files. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + recal_file The input recal csv file to analyze + output_dir The directory in which to output all the plots and intermediate data files + path_to_Rscript The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript + path_to_resources Path to resources folder holding the Sting R scripts. + ignoreQ Ignore bases with reported quality less than this number. + numRG Only process N read groups. Default value: -1 (process all read groups) + max_quality_score The integer value at which to cap the quality scores, default is 50 + max_histogram_value If supplied, this value will be the max value of the histogram plots + do_indel_quality If supplied, this value will be the max value of the histogram plots + +@CITATION_SECTION@ + </help> +</tool>