# HG changeset patch # User david-hoover # Date 1347486886 14400 # Node ID 432aafa6830aae48f284b59f1d922e73d06769c7 # Parent b138c556923173dac86ec128591f8fffc84df202 Uploaded diff -r b138c5569231 -r 432aafa6830a README --- a/README Wed Sep 12 12:38:51 2012 -0400 +++ b/README Wed Sep 12 17:54:46 2012 -0400 @@ -5,5 +5,8 @@ Additionally, copies of or links to the GenomeAnalysisTK.jar and key file must be made within the directory tool-data/shared/jars/gatk2. - ${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk2/GenomeAnalysisTK.jar - ${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk2/gatk2_key_file + cd ${GALAXY_DATA_INDEX_DIR}/shared/jars + mkdir gatk2 + cd gatk2 + ln -s /path/to/wherever/GenomeAnalysisTK.jar . + ln -s /path/to/wherever/key.file gatk2_key_file diff -r b138c5569231 -r 432aafa6830a count_covariates.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/count_covariates.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,576 @@ + + on BAM files + + gatk + samtools + + gatk2_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "CountCovariates" + --num_threads 4 ##hard coded, for now + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + --recal_file "${output_recal}" + ${standard_covs} + #if str( $covariates ) != "None": + #for $cov in str( $covariates ).split( ',' ): + -cov "${cov}" + #end for + #end if + ' + + #set $snp_dataset_provided = False + #set $rod_binding_names = dict() + #for $rod_binding in $rod_bind: + #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': + #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name + #else + #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector + #end if + #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'dbsnp': + #set $snp_dataset_provided = True + #end if + #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 + -d "--knownSites:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + #end for + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if str( $reference_source.reference_source_selector ) == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + + ##start analysis specific options + #if $analysis_param_type.analysis_param_type_selector == "advanced": + -p ' + #if $analysis_param_type.default_read_group_type.default_read_group_type_selector == "set": + --default_read_group "${analysis_param_type.default_read_group_type.default_read_group}" + #end if + #if str( $analysis_param_type.default_platform ) != "default": + --default_platform "${analysis_param_type.default_platform}" + #end if + #if str( $analysis_param_type.force_read_group_type.force_read_group_type_selector ) == "set": + --force_read_group "${analysis_param_type.force_read_group_type.force_read_group}" + #end if + #if str( $analysis_param_type.force_platform ) != "default": + --force_platform "${analysis_param_type.force_platform}" + #end if + ${analysis_param_type.exception_if_no_tile} + #if str( $analysis_param_type.solid_options_type.solid_options_type_selector ) == "set": + #if str( $analysis_param_type.solid_options_type.solid_recal_mode ) != "default": + --solid_recal_mode "${analysis_param_type.solid_options_type.solid_recal_mode}" + #end if + #if str( $analysis_param_type.solid_options_type.solid_nocall_strategy ) != "default": + --solid_nocall_strategy "${analysis_param_type.solid_options_type.solid_nocall_strategy}" + #end if + #end if + --window_size_nqs "${analysis_param_type.window_size_nqs}" + --homopolymer_nback "${analysis_param_type.homopolymer_nback}" + ' + #end if + #if not $snp_dataset_provided: + -p '--run_without_dbsnp_potentially_ruining_quality' + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: warningmark + +"This calculation is critically dependent on being able to skip over known variant sites. Please provide a dbSNP ROD or a VCF file containing known sites of genetic variation." +However, if you do not provide this file, the '--run_without_dbsnp_potentially_ruining_quality' flag will be automatically used, and the command will be allowed to run. + +**What it does** + +This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative of poor base quality. This walker generates tables based on various user-specified covariates (such as read group, reported quality score, cycle, and dinucleotide) Since there is a large amount of data one can then calculate an empirical probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations The output file is a CSV list of (the several covariate values, num observations, num mismatches, empirical quality score) The first non-comment line of the output file gives the name of the covariates that were used for this calculation. Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and will be added for the user regardless of whether or not they were specified Note: This walker is designed to be used in conjunction with TableRecalibrationWalker. + +For more information on base quality score recalibration using the GATK, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: CountCovariates accepts an aligned BAM input file. + + +**Outputs** + +The output is in CSV format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + + default_read_group If a read has no read group then default to the provided String. + default_platform If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid. + force_read_group If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group. + force_platform If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid. + window_size_nqs The window size used by MinimumNQSCovariate for its calculation + homopolymer_nback The number of previous bases to look at in HomopolymerCovariate + exception_if_no_tile If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1 + solid_recal_mode How should we recalibrate solid bases in whichthe reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS (DO_NOTHING|SET_Q_ZERO|SET_Q_ZERO_BASE_N|REMOVE_REF_BIAS) + solid_nocall_strategy Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ (THROW_EXCEPTION|LEAVE_READ_UNRECALIBRATED|PURGE_READ) + recal_file Filename for the input covariates table recalibration .csv file + out The output CSV file + recal_file Filename for the outputted covariates table recalibration file + standard_covs Use the standard set of covariates in addition to the ones listed using the -cov argument + run_without_dbsnp_potentially_ruining_quality If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only. + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a depth_of_coverage.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/depth_of_coverage.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,1027 @@ + + on BAM files + + gatk + samtools + + gatk2_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + #for $i, $input_bam in enumerate( $reference_source.input_bams ): + -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}" + #if str( $input_bam.input_bam.metadata.bam_index ) != "None": + -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index + #end if + #end for + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "DepthOfCoverage" + ##--num_threads 4 ##hard coded, for now + + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + #if str( $input_calculate_coverage_over_genes ) != "None": + --calculateCoverageOverGenes "${input_calculate_coverage_over_genes}" + #end if + #if str( $partition_type ) != "None": + #for $pt in str( $partition_type ).split( ',' ): + --partitionType "${pt}" + #end for + #end if + --out "${output_per_locus_coverage}" + + #for $ct_group in $summary_coverage_threshold_group: + --summaryCoverageThreshold "${ct_group.summary_coverage_threshold}" + #end for + --outputFormat "${output_format}" + ' + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if $reference_source.reference_source_selector == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + ##start analysis specific options + #if $analysis_param_type.analysis_param_type_selector == "advanced": + -p ' + ${analysis_param_type.ignore_deletion_sites} + ${analysis_param_type.include_deletions} + --maxBaseQuality "${analysis_param_type.max_base_quality}" + --maxMappingQuality "${analysis_param_type.max_mapping_quality}" + --minBaseQuality "${analysis_param_type.min_base_quality}" + --minMappingQuality "${analysis_param_type.min_mapping_quality}" + --nBins "${analysis_param_type.n_bins}" + ${analysis_param_type.omit_depth_output_at_each_base} + ${analysis_param_type.omit_interval_statistics} + ${analysis_param_type.omit_locus_table} + ${analysis_param_type.omit_per_sample_stats} + ${analysis_param_type.print_base_counts} + ${analysis_param_type.print_bin_endpoints_and_exit} + --start "${analysis_param_type.start}" + --stop "${analysis_param_type.stop}" + ' + #end if + ##Move additional files to final location + #if str( $partition_type ) != "None": + #set $partition_types = str( $partition_type ).split( ',' ) + #else: + #set $partition_types = [ 'sample' ] + #end if + #if 'sample' in $partition_types and ( str( $analysis_param_type.analysis_param_type_selector ) == "basic" or str( $analysis_param_type.print_bin_endpoints_and_exit ) == "" ): + #if str( $analysis_param_type.analysis_param_type_selector ) == "basic" or str( $analysis_param_type.omit_per_sample_stats ) == "": + && mv ${output_per_locus_coverage}.sample_summary ${output_summary_sample} + && mv ${output_per_locus_coverage}.sample_statistics ${output_statistics_sample} + #end if + #if $gatk_param_type.gatk_param_type_selector == "advanced" and len( $gatk_param_type.input_interval_repeat ) and ( str( $analysis_param_type.analysis_param_type_selector ) == "basic" or str( $analysis_param_type.omit_interval_statistics ) == "" ): + && mv ${output_per_locus_coverage}.sample_interval_summary ${output_interval_summary_sample} + && mv ${output_per_locus_coverage}.sample_interval_statistics ${output_interval_statistics_sample} + #end if + #if str( $input_calculate_coverage_over_genes ) != "None": + && mv ${output_per_locus_coverage}.sample_gene_summary ${output_gene_summary_sample} + && mv ${output_per_locus_coverage}.sample_gene_statistics ${output_gene_statistics_sample} + #end if + #if str( $analysis_param_type.analysis_param_type_selector ) == "basic" or str( $analysis_param_type.omit_depth_output_at_each_base ) == "": + && mv ${output_per_locus_coverage}.sample_cumulative_coverage_counts ${output_cumulative_coverage_counts_sample} + && mv ${output_per_locus_coverage}.sample_cumulative_coverage_proportions ${output_cumulative_coverage_proportions_sample} + #end if + #end if + + #if 'readgroup' in $partition_types and ( str( $analysis_param_type.analysis_param_type_selector ) == "basic" or str( $analysis_param_type.print_bin_endpoints_and_exit ) == "" ): + #if str( $analysis_param_type.analysis_param_type_selector ) == "basic" or str( $analysis_param_type.omit_per_sample_stats ) == "": + && mv ${output_per_locus_coverage}.read_group_summary ${output_summary_readgroup} + && mv ${output_per_locus_coverage}.read_group_statistics ${output_statistics_readgroup} + #end if + #if $gatk_param_type.gatk_param_type_selector == "advanced" and len( $gatk_param_type.input_interval_repeat ) and ( str( $analysis_param_type.analysis_param_type_selector ) == "basic" or str( $analysis_param_type.omit_interval_statistics ) == "" ): + && mv ${output_per_locus_coverage}.read_group_interval_summary ${output_interval_summary_readgroup} + && mv ${output_per_locus_coverage}.read_group_interval_statistics ${output_interval_statistics_readgroup} + #end if + #if str( $input_calculate_coverage_over_genes ) != "None": + && mv ${output_per_locus_coverage}.read_group_gene_summary ${output_gene_summary_readgroup} + && mv ${output_per_locus_coverage}.read_group_gene_statistics ${output_gene_statistics_readgroup} + #end if + #if str( $analysis_param_type.analysis_param_type_selector ) == "basic" or str( $analysis_param_type.omit_depth_output_at_each_base ) == "": + && mv ${output_per_locus_coverage}.read_group_cumulative_coverage_counts ${output_cumulative_coverage_counts_readgroup} + && mv ${output_per_locus_coverage}.read_group_cumulative_coverage_proportions ${output_cumulative_coverage_proportions_readgroup} + #end if + #end if + + #if 'library' in $partition_types and ( str( $analysis_param_type.analysis_param_type_selector ) == "basic" or str( $analysis_param_type.print_bin_endpoints_and_exit ) == "" ): + #if str( $analysis_param_type.analysis_param_type_selector ) == "basic" or str( $analysis_param_type.omit_per_sample_stats ) == "": + && mv ${output_per_locus_coverage}.library_summary ${output_summary_library} + && mv ${output_per_locus_coverage}.library_statistics ${output_statistics_library} + #end if + #if $gatk_param_type.gatk_param_type_selector == "advanced" and len( $gatk_param_type.input_interval_repeat ) and ( str( $analysis_param_type.analysis_param_type_selector ) == "basic" or str( $analysis_param_type.omit_interval_statistics ) == "" ): + && mv ${output_per_locus_coverage}.library_interval_summary ${output_interval_summary_library} + && mv ${output_per_locus_coverage}.library_interval_statistics ${output_interval_statistics_library} + #end if + #if str( $input_calculate_coverage_over_genes ) != "None": + && mv ${output_per_locus_coverage}.library_gene_summary ${output_gene_summary_library} + && mv ${output_per_locus_coverage}.library_gene_statistics ${output_gene_statistics_library} + #end if + #if str( $analysis_param_type.analysis_param_type_selector ) == "basic" or str( $analysis_param_type.omit_depth_output_at_each_base ) == "": + && mv ${output_per_locus_coverage}.library_cumulative_coverage_counts ${output_cumulative_coverage_counts_library} + && mv ${output_per_locus_coverage}.library_cumulative_coverage_proportions ${output_cumulative_coverage_proportions_library} + #end if + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_per_sample_stats'] == False + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'sample' in partition_type or not partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_per_sample_stats'] == False + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'sample' in partition_type or not partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'sample' in partition_type or not partition_type + gatk_param_type['gatk_param_type_selector'] == "advanced" and len( gatk_param_type['input_interval_repeat'] ) + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_interval_statistics'] == False + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'sample' in partition_type or not partition_type + gatk_param_type['gatk_param_type_selector'] == "advanced" and len( gatk_param_type['input_interval_repeat'] ) + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_interval_statistics'] == False + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + input_calculate_coverage_over_genes is not None and 'sample' in partition_type or not partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + input_calculate_coverage_over_genes is not None and 'sample' in partition_type or not partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_depth_output_at_each_base'] == False + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'sample' in partition_type or not partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_depth_output_at_each_base'] == False + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'sample' in partition_type or not partition_type + + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_per_sample_stats'] == False + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'readgroup' in partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_per_sample_stats'] == False + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'readgroup' in partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'readgroup' in partition_type + gatk_param_type['gatk_param_type_selector'] == "advanced" and len( gatk_param_type['input_interval_repeat'] ) + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_interval_statistics'] == False + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'readgroup' in partition_type + gatk_param_type['gatk_param_type_selector'] == "advanced" and len( gatk_param_type['input_interval_repeat'] ) + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_interval_statistics'] == False + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + input_calculate_coverage_over_genes is not None and 'readgroup' in partition_type or not partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + input_calculate_coverage_over_genes is not None and 'readgroup' in partition_type or not partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_depth_output_at_each_base'] == False + 'readgroup' in partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_depth_output_at_each_base'] == False + 'readgroup' in partition_type + + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_per_sample_stats'] == False + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'library' in partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_per_sample_stats'] == False + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'library' in partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'library' in partition_type + gatk_param_type['gatk_param_type_selector'] == "advanced" and len( gatk_param_type['input_interval_repeat'] ) + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_interval_statistics'] == False + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'library' in partition_type + gatk_param_type['gatk_param_type_selector'] == "advanced" and len( gatk_param_type['input_interval_repeat'] ) + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_interval_statistics'] == False + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + input_calculate_coverage_over_genes is not None and 'library' in partition_type or not partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + input_calculate_coverage_over_genes is not None and 'library' in partition_type or not partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_depth_output_at_each_base'] == False + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'library' in partition_type + + + + + + + + + + + + + + + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['omit_depth_output_at_each_base'] == False + analysis_param_type['analysis_param_type_selector'] == "basic" or analysis_param_type['print_bin_endpoints_and_exit'] == False + 'library' in partition_type + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +DepthOfCoverage processes a set of bam files to determine coverage at different levels of partitioning and aggregation. Coverage can be analyzed per locus, per interval, per gene, or in total; can be partitioned by sample, by read group, by technology, by center, or by library; and can be summarized by mean, median, quartiles, and/or percentage of bases covered to or beyond a threshold. Additionally, reads and bases can be filtered by mapping or base quality score. + +For more information on the GATK Depth of Coverage, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Depth_of_Coverage>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: DepthOfCoverage accepts aligned BAM input files. + + +**Outputs** + +The output is in various table formats. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + calculateCoverageOverGenes File NA Calculate the coverage statistics over this list of genes. Currently accepts RefSeq. + ignoreDeletionSites boolean false Ignore sites consisting only of deletions + includeDeletions boolean false Include information on deletions + maxBaseQuality byte 127 Maximum quality of bases to count towards depth. Defaults to 127 (Byte.MAX_VALUE). + maxMappingQuality int 2147483647 Maximum mapping quality of reads to count towards depth. Defaults to 2^31-1 (Integer.MAX_VALUE). + minBaseQuality byte -1 Minimum quality of bases to count towards depth. Defaults to -1. + minMappingQuality int -1 Minimum mapping quality of reads to count towards depth. Defaults to -1. + nBins int 499 Number of bins to use for granular binning + omitDepthOutputAtEachBase boolean false Will omit the output of the depth of coverage at each base, which should result in speedup + omitIntervalStatistics boolean false Will omit the per-interval statistics section, which should result in speedup + omitLocusTable boolean false Will not calculate the per-sample per-depth counts of loci, which should result in speedup + omitPerSampleStats boolean false Omits the summary files per-sample. These statistics are still calculated, so this argument will not improve runtime. + outputFormat String rtable the format of the output file (e.g. csv, table, rtable); defaults to r-readable table + partitionType Set[Partition] [sample] Partition type for depth of coverage. Defaults to sample. Can be any combination of sample, readgroup, library. + printBaseCounts boolean false Will add base counts to per-locus output. + printBinEndpointsAndExit boolean false Prints the bin values and exits immediately. Use to calibrate what bins you want before running on data. + start int 1 Starting (left endpoint) for granular binning + stop int 500 Ending (right endpoint) for granular binning + summaryCoverageThreshold int[] [15] for summary file outputs, report the % of bases coverd to >= this number. Defaults to 15; can take multiple arguments. + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a indel_realigner.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/indel_realigner.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,492 @@ + + - perform local realignment + + gatk + samtools + + gatk2_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "IndelRealigner" + -o "${output_bam}" + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + ##--num_threads 4 ##hard coded, for now + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + -LOD "${lod_threshold}" + ${knowns_only} + ' + + #set $rod_binding_names = dict() + #for $rod_binding in $rod_bind: + #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': + #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name + #else + #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector + #end if + #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 + -d "-known:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + #end for + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + #if $reference_source.reference_source_selector == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + ##start analysis specific options + -d "-targetIntervals" "${target_intervals}" "${target_intervals.ext}" "gatk_target_intervals" + -p ' + --disable_bam_indexing + ' + #if $analysis_param_type.analysis_param_type_selector == "advanced": + -p ' + --entropyThreshold "${analysis_param_type.entropy_threshold}" + ${analysis_param_type.simplify_bam} + --consensusDeterminationModel "${analysis_param_type.consensus_determination_model}" + --maxIsizeForMovement "${analysis_param_type.max_insert_size_for_movement}" + --maxPositionalMoveAllowed "${analysis_param_type.max_positional_move_allowed}" + --maxConsensuses "${analysis_param_type.max_consensuses}" + --maxReadsForConsensuses "${analysis_param_type.max_reads_for_consensuses}" + --maxReadsForRealignment "${analysis_param_type.max_reads_for_realignment}" + ${analysis_param_type.no_original_alignment_tags} + ' + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Performs local realignment of reads based on misalignments due to the presence of indels. Unlike most mappers, this walker uses the full alignment context to determine whether an appropriate alternate reference (i.e. indel) exists and updates SAMRecords accordingly. + +For more information on local realignment around indels using the GATK, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: IndelRealigner accepts an aligned BAM and a list of intervals to realign as input files. + + +**Outputs** + +The output is in the BAM format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + targetIntervals intervals file output from RealignerTargetCreator + LODThresholdForCleaning LOD threshold above which the cleaner will clean + entropyThreshold percentage of mismatches at a locus to be considered having high entropy + out Output bam + bam_compression Compression level to use for writing BAM files + disable_bam_indexing Turn off on-the-fly creation of indices for output BAM files. + simplifyBAM If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier + useOnlyKnownIndels Don't run 'Smith-Waterman' to generate alternate consenses; use only known indels provided as RODs for constructing the alternate references. + maxReadsInMemory max reads allowed to be kept in memory at a time by the SAMFileWriter. Keep it low to minimize memory consumption (but the tool may skip realignment on regions with too much coverage. If it is too low, it may generate errors during realignment); keep it high to maximize realignment (but make sure to give Java enough memory). + maxIsizeForMovement maximum insert size of read pairs that we attempt to realign + maxPositionalMoveAllowed maximum positional move in basepairs that a read can be adjusted during realignment + maxConsensuses max alternate consensuses to try (necessary to improve performance in deep coverage) + maxReadsForConsensuses max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage) + maxReadsForRealignment max reads allowed at an interval for realignment; if this value is exceeded, realignment is not attempted and the reads are passed to the output file(s) as-is + noOriginalAlignmentTags Don't output the original cigar or alignment start tags for each realigned read in the output bam. + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a print_reads.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/print_reads.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,425 @@ + + from BAM files + + gatk + samtools + + gatk2_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + #for $i, $input_bam in enumerate( $reference_source.input_bams ): + -d "-I" "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "gatk_input_${i}" + #if str( $input_bam.input_bam.metadata.bam_index ) != "None": + -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "gatk_input_${i}" ##hardcode galaxy ext type as bam_index + #end if + #end for + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "PrintReads" + ##--num_threads 4 ##hard coded, for now + --out "${output_bam}" + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + --number "${number}" + #if $platform: + --platform "${platform}" + #end if + #if $read_group: + --readGroup "${read_group}" + #end if + #for $sample_file in $sample_file_repeat: + --sample_file "${sample_file.input_sample_file}" + #end for + #for $sample_name in $sample_name_repeat: + --sample_name "${sample_name.sample_name}" + #end for + ' + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if $reference_source.reference_source_selector == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +PrintReads can dynamically merge the contents of multiple input BAM files, resulting in merged output sorted in coordinate order. + +For more information on the GATK Print Reads Walker, see this `tool specific page <http://www.broadinstitute.org/gsa/gatkdocs/release/org_broadinstitute_sting_gatk_walkers_PrintReadsWalker.html>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: PrintReads accepts one or more BAM or SAM input files. + + +**Outputs** + +The output is in BAM format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + number int -1 Print the first n reads from the file, discarding the rest + platform String NA Exclude all reads with this platform from the output + readGroup String NA Exclude all reads with this read group from the output + sample_file Set[File] [] File containing a list of samples (one per line). Can be specified multiple times + sample_name Set[String] [] Sample name to be included in the analysis. Can be specified multiple times. + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a realigner_target_creator.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/realigner_target_creator.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,449 @@ + + for use in local realignment + + gatk + samtools + + gatk2_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "RealignerTargetCreator" + -o "${output_interval}" + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + --num_threads 4 ##hard coded, for now + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + ' + #set $rod_binding_names = dict() + #for $rod_binding in $rod_bind: + #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': + #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name + #else + #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector + #end if + #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 + -d "-known:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + #end for + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if $reference_source.reference_source_selector == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + ##start analysis specific options + #if $analysis_param_type.analysis_param_type_selector == "advanced": + -p ' + --minReadsAtLocus "${analysis_param_type.minReadsAtLocus}" + --windowSize "${analysis_param_type.windowSize}" + --mismatchFraction "${analysis_param_type.mismatchFraction}" + --maxIntervalSize "${analysis_param_type.maxIntervalSize}" + ' + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string. + +For more information on local realignment around indels using the GATK, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: RealignerTargetCreator accepts an aligned BAM input file. + + +**Outputs** + +The output is in GATK Interval format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + windowSize window size for calculating entropy or SNP clusters + mismatchFraction fraction of base qualities needing to mismatch for a position to have high entropy; to disable set to <= 0 or > 1 + minReadsAtLocus minimum reads at a locus to enable using the entropy calculation + maxIntervalSize maximum interval size + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a table_recalibration.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/table_recalibration.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,516 @@ + + on BAM files + + gatk + samtools + + gatk2_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "TableRecalibration" + -o "${output_bam}" + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + ##--num_threads 4 ##hard coded, for now + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + --recal_file "${input_recal}" + --disable_bam_indexing + ' + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if str( $reference_source.reference_source_selector ) == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + + ##start analysis specific options + #if $analysis_param_type.analysis_param_type_selector == "advanced": + -p ' + #if $analysis_param_type.default_read_group_type.default_read_group_type_selector == "set": + --default_read_group "${analysis_param_type.default_read_group_type.default_read_group}" + #end if + #if str( $analysis_param_type.default_platform ) != "default": + --default_platform "${analysis_param_type.default_platform}" + #end if + #if str( $analysis_param_type.force_read_group_type.force_read_group_type_selector ) == "set": + --force_read_group "${analysis_param_type.force_read_group_type.force_read_group}" + #end if + #if str( $analysis_param_type.force_platform ) != "default": + --force_platform "${analysis_param_type.force_platform}" + #end if + ${analysis_param_type.exception_if_no_tile} + #if str( $analysis_param_type.solid_options_type.solid_options_type_selector ) == "set": + #if str( $analysis_param_type.solid_options_type.solid_recal_mode ) != "default": + --solid_recal_mode "${analysis_param_type.solid_options_type.solid_recal_mode}" + #end if + #if str( $analysis_param_type.solid_options_type.solid_nocall_strategy ) != "default": + --solid_nocall_strategy "${analysis_param_type.solid_options_type.solid_nocall_strategy}" + #end if + #end if + ${analysis_param_type.simplify_bam} + --preserve_qscores_less_than "${analysis_param_type.preserve_qscores_less_than}" + --smoothing "${analysis_param_type.smoothing}" + --max_quality_score "${analysis_param_type.max_quality_score}" + --window_size_nqs "${analysis_param_type.window_size_nqs}" + --homopolymer_nback "${analysis_param_type.homopolymer_nback}" + ${analysis_param_type.do_not_write_original_quals} + ' + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This walker is designed to work as the second pass in a two-pass processing step, doing a by-read traversal. For each base in each read this walker calculates various user-specified covariates (such as read group, reported quality score, cycle, and dinuc) Using these values as a key in a large hashmap the walker calculates an empirical base quality score and overwrites the quality score currently in the read. This walker then outputs a new bam file with these updated (recalibrated) reads. Note: This walker expects as input the recalibration table file generated previously by CovariateCounterWalker. Note: This walker is designed to be used in conjunction with CovariateCounterWalker. + +For more information on base quality score recalibration using the GATK, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: TableRecalibration accepts an aligned BAM and a recalibration CSV input files. + + +**Outputs** + +The output is in BAM format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + default_read_group If a read has no read group then default to the provided String. + default_platform If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid. + force_read_group If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group. + force_platform If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid. + window_size_nqs The window size used by MinimumNQSCovariate for its calculation + homopolymer_nback The number of previous bases to look at in HomopolymerCovariate + exception_if_no_tile If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1 + solid_recal_mode How should we recalibrate solid bases in whichthe reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS (DO_NOTHING|SET_Q_ZERO|SET_Q_ZERO_BASE_N|REMOVE_REF_BIAS) + solid_nocall_strategy Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ (THROW_EXCEPTION|LEAVE_READ_UNRECALIBRATED|PURGE_READ) + recal_file Filename for the input covariates table recalibration .csv file + out The output BAM file + bam_compression Compression level to use for writing BAM files + disable_bam_indexing Turn off on-the-fly creation of indices for output BAM files. + simplifyBAM If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier + preserve_qscores_less_than Bases with quality scores less than this threshold won't be recalibrated, default=5. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases + smoothing Number of imaginary counts to add to each bin bin order to smooth out bins with few data points, default=1 + max_quality_score The integer value at which to cap the quality scores, default=50 + doNotWriteOriginalQuals If true, we will not write the original quality (OQ) tag for each read + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a unified_genotyper.xml --- a/unified_genotyper.xml Wed Sep 12 12:38:51 2012 -0400 +++ b/unified_genotyper.xml Wed Sep 12 17:54:46 2012 -0400 @@ -1,7 +1,7 @@ SNP and indel caller - gatk2 + gatk samtools gatk2_wrapper.py @@ -14,12 +14,12 @@ #end if #end for -p 'java - -jar "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk2/GenomeAnalysisTK.jar" + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" -T "UnifiedGenotyper" --num_threads 4 ##hard coded, for now --out "${output_vcf}" --metrics_file "${output_metrics}" - -et "NO_ET" -K "${GALAXY_DATA_INDEX_DIR}/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout #if $reference_source.reference_source_selector != "history": -R "${reference_source.ref_file.fields.path}" diff -r b138c5569231 -r 432aafa6830a variant_annotator.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_annotator.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,540 @@ + + + + gatk + samtools + + gatk2_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + #if str( $reference_source.input_bam ) != "None": + -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if + #end if + -d "--variant" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant" + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + ##--list + -T "VariantAnnotator" + ##--num_threads 4 ##hard coded, for now + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + -o "${output_vcf}" + #if str( $annotations_type.annotations_type_selector ) == "use_all_annotations": + --useAllAnnotations + #else: + #if $annotations_type.annotations: + #for $annotation in str( $annotations_type.annotations.fields.gatk_value ).split( ',' ): + --annotation "${annotation}" + #end for + #end if + #end if + #if $exclude_annotations: + #for $annotation in str( $exclude_annotations.fields.gatk_value ).split( ',' ): + --excludeAnnotation "${annotation}" + #end for + #end if + #for $additional_annotation in $additional_annotations: + --annotation "${additional_annotation.additional_annotation_name}" + #end for + ' + #if $reference_source.input_variant_bti: + -d "--intervals" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant_bti" + #end if + + #for $rod_binding in $comp_rod_bind: + -d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}" + #end for + + #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp': + -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}" + #end if + + + #for $rod_binding in $resource_rod_bind: + -d "--resource:${rod_binding.resource_rod_name},%(file_type)s" "${rod_binding.resource_input_rod}" "${rod_binding.resource_input_rod.ext}" "input_resource_${rod_binding.resource_rod_name}" + #end for + + #if str( $snpEff_rod_bind_type.snpEff_rod_bind_type_selector ) == 'set_snpEff': + -p '--annotation "SnpEff"' + -d "--snpEffFile:${snpEff_rod_bind_type.snpEff_rod_name},%(file_type)s" "${snpEff_rod_bind_type.snpEff_input_rod}" "${snpEff_rod_bind_type.snpEff_input_rod.ext}" "input_snpEff_${snpEff_rod_bind_type.snpEff_rod_name}" + #else: + -p '--excludeAnnotation "SnpEff"' + #end if + + #for $expression in $expressions: + -p '--expression "${expression.expression}"' + #end for + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + #if str( $reference_source.reference_source_selector ) == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + + -p ' + #if str( $annotation_group ) != "None": + #for $group in str( $annotation_group ).split( ',' ): + --group "${group}" + #end for + #end if + #if str( $family_string ) != "": + --family_string "${family_string}" + #end if + --MendelViolationGenotypeQualityThreshold "${mendel_violation_genotype_quality_threshold}" + ' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Annotates variant calls with context information. Users can specify which of the available annotations to use. + +For more information on using the VariantAnnotator, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/VariantAnnotator>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + + +**Inputs** + +GenomeAnalysisTK: VariantAnnotator accepts a variant input file. + + +**Outputs** + +The output is in VCF format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + + sampleName The sample (NA-ID) corresponding to the variant input (for non-VCF input only) + annotation One or more specific annotations to apply to variant calls + group One or more classes/groups of annotations to apply to variant calls + expression One or more specific expressions to apply to variant calls; see documentation for more details + useAllAnnotations Use all possible annotations (not for the faint of heart) + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a variant_apply_recalibration.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_apply_recalibration.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,414 @@ + + + + gatk2 + + gatk2_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + #for $var_count, $variant in enumerate( $reference_source.variants ): + -d "--input:input_${var_count},%(file_type)s" "${variant.input_variants}" "${variant.input_variants.ext}" "input_variants_${var_count}" + #end for + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "ApplyRecalibration" + ##--num_threads 4 ##hard coded, for now + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + --recal_file "${reference_source.input_recal}" + --tranches_file "${reference_source.input_tranches}" + --out "${output_variants}" + ' + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if str( $reference_source.reference_source_selector ) == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + + ##start analysis specific options + -p ' + --mode "${mode}" + + #for $ignore_filter in $ignore_filters: + #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.ignore_filter_type_selector ) + #if $ignore_filter_name == "custom": + #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.filter_name ) + #end if + --ignore_filter "${ignore_filter_name}" + #end for + --ts_filter_level "${ts_filter_level}" + ' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Applies cuts to the input vcf file (by adding filter lines) to achieve the desired novel FDR levels which were specified during VariantRecalibration + +For more information on using the ApplyRecalibration module, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: ApplyRecalibration accepts a variant input file, a recalibration file and a tranches file. + + +**Outputs** + +The output is in VCF format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + + recal_file The output recal file used by ApplyRecalibration + tranches_file The input tranches file describing where to cut the data + out The output filtered, recalibrated VCF file + ts_filter_level The truth sensitivity level at which to start filtering + ignore_filter If specified the optimizer will use variants even if the specified filter name is marked in the input VCF file + mode Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously. (SNP|INDEL|BOTH) + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a variant_combine.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_combine.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,455 @@ + + + + gatk + + gatk2_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + + #set $priority_order = [] + #for $input_variant in $reference_source.input_variants: + -d "--variant:${input_variant.input_variant_name},%(file_type)s" "${input_variant.input_variant}" "${input_variant.input_variant.ext}" "input_variant_${input_variant.input_variant_name}" + #set $input_variant_name = str( $input_variant.input_variant_name ) + #assert $input_variant_name not in $priority_order, "Variant Names must be unique" ##this should be handled by a validator + #silent $priority_order.append( $input_variant_name ) + #end for + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "CombineVariants" + --out "${output_variants}" + ##--num_threads 4 ##hard coded, for now + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + --genotypemergeoption "${genotype_merge_option}" + --rod_priority_list "${ ','.join( $priority_order ) }" + ' + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if $reference_source.reference_source_selector == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + + + ##start analysis specific options + #if $analysis_param_type.analysis_param_type_selector == "advanced": + -p ' + --filteredrecordsmergetype "${analysis_param_type.filtered_records_merge_type}" + ${analysis_param_type.print_complex_merges} + ${analysis_param_type.filtered_are_uncalled} + ${analysis_param_type.minimal_vcf} + ${analysis_param_type.assume_identical_samples} + + #if str( $analysis_param_type.set_key ): + --setKey "${analysis_param_type.set_key}" + #end if + + --minimumN "${analysis_param_type.minimum_n}" + ' + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Combines VCF records from different sources; supports both full merges and set unions. Merge: combines multiple records into a single one; if sample names overlap then they are uniquified. Union: assumes each rod represents the same set of samples (although this is not enforced); using the priority list (if provided), emits a single record instance at every position represented in the rods. + +For more information on using the CombineVariants module, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/CombineVariants>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: CombineVariants accepts variant files as input. + +------ + +**Outputs** + +The output is a combined vcf file. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + out File to which variants should be written + genotypemergeoption How should we merge genotype records for samples shared across the ROD files? (UNIQUIFY|PRIORITIZE|UNSORTED|REQUIRE_UNIQUE) + filteredrecordsmergetype How should we deal with records seen at the same site in the VCF, but with different FILTER fields? KEEP_IF_ANY_UNFILTERED PASSes the record if any record is unfiltered, KEEP_IF_ALL_UNFILTERED requires all records to be unfiltered (KEEP_IF_ANY_UNFILTERED|KEEP_IF_ALL_UNFILTERED) + rod_priority_list When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided + printComplexMerges Print out interesting sites requiring complex compatibility merging + filteredAreUncalled If true, then filtered VCFs are treated as uncalled, so that filtered set annotation don't appear in the combined VCF + minimalVCF If true, then the output VCF will contain no INFO or genotype INFO field + setKey Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from. Set to null if you don't want the set field emitted. + assumeIdenticalSamples If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime. + minimumN Combine variants and output site only if variant is present in at least N input files. + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a variant_eval.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_eval.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,572 @@ + + + + gatk + + gatk2_wrapper.py + #from binascii import hexlify + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + #for $var_count, $variant in enumerate( $reference_source.variants ): + -d "--eval:input_${var_count},%(file_type)s" "${variant.input_variant}" "${variant.input_variant.ext}" "input_variants_${var_count}" + #end for + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "VariantEval" + --out "${output_report}" + --num_threads 4 ##hard coded, for now + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + ' + + #for $rod_binding in $comp_rod_bind: + -d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}" + #if str( $rod_binding.comp_known_names ): + -p '--known_names "${rod_binding.comp_rod_name}"' + #end if + #end for + + #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp': + -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}" + #if str( $dbsnp_rod_bind_type.dbsnp_known_names ): + -p '--known_names "${dbsnp_rod_bind_type.dbsnp_rod_name}"' + #end if + #end if + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if $reference_source.reference_source_selector == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + + + ##start analysis specific options + #if $analysis_param_type.analysis_param_type_selector == "advanced": + #for $stratification in $analysis_param_type.stratifications: + #set $select_string = "--select_exps '%s' --select_names '%s'" % ( str( $stratification.select_exps ), str( $stratification.select_name ) ) + -o '${ hexlify( $select_string ) }' + #end for + -p ' + + #for $sample in $analysis_param_type.samples: + --sample "${sample.sample}" + #end for + + #if str( $analysis_param_type.stratification_modules ) != "None": + #for $stratification_module in str( $analysis_param_type.stratification_modules).split( ',' ): + --stratificationModule "${stratification_module}" + #end for + #end if + + ${analysis_param_type.do_not_use_all_standard_stratifications} + + #for $variant_type in $analysis_param_type.only_variants_of_type: + --onlyVariantsOfType "${variant_type.variant_type}" + #end for + + #if str( $analysis_param_type.eval_modules ) != "None": + #for $eval_module in str( $analysis_param_type.eval_modules).split( ',' ): + --evalModule "${eval_module}" + #end for + #end if + + ${analysis_param_type.do_not_use_all_standard_modules} + + #if str( $analysis_param_type.num_samples ) != "0": + --numSamples "${analysis_param_type.num_samples}" + #end if + + --minPhaseQuality "${analysis_param_type.min_phase_quality}" + + #if str( $analysis_param_type.family ): + --family_structure "${analysis_param_type.family}" + #end if + + --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}" + + #if str( $analysis_param_type.ancestral_alignments ) != "None": + --ancestralAlignments "${analysis_param_type.ancestral_alignments}" + #end if + ' + #if str( $analysis_param_type.known_cnvs ) != "None": + -d "--knownCNVs" "${analysis_param_type.known_cnvs}" "${analysis_param_type.known_cnvs.ext}" "input_known_cnvs" + #end if + + #if str( $analysis_param_type.strat_intervals ) != "None": + -d "--stratIntervals" "${analysis_param_type.strat_intervals}" "${analysis_param_type.strat_intervals.ext}" "input_strat_intervals" + #end if + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more) + +For more information on using the VariantEval module, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/VariantEval>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: VariantEval accepts variant files as input. + + +**Outputs** + +The output is a table of variant evaluation. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + + +------- + +**Settings**:: + + out An output file presented to the walker. Will overwrite contents if file exists. + list List the available eval modules and exit + select_exps One or more stratifications to use when evaluating the data + select_names Names to use for the list of stratifications (must be a 1-to-1 mapping) + sample Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context + known_names Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets + stratificationModule One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified) + doNotUseAllStandardStratifications Do not use the standard stratification modules by default (instead, only those that are specified with the -S option) + onlyVariantsOfType If provided, only variants of these types will be considered during the evaluation, in + evalModule One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified) + doNotUseAllStandardModules Do not use the standard modules by default (instead, only those that are specified with the -E option) + numSamples Number of samples (used if no samples are available in the VCF file + minPhaseQuality Minimum phasing quality + family_structure If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined + mendelianViolationQualThreshold Minimum genotype QUAL score for each trio member required to accept a site as a violation + ancestralAlignments Fasta file with ancestral alleles + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a variant_filtration.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_filtration.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,457 @@ + + on VCF files + + gatk + + gatk2_wrapper.py + #from binascii import hexlify + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + -d "--variant:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant" + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "VariantFiltration" + ##--num_threads 4 ##hard coded, for now + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + -o "${output_vcf}" + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + ' + #for $variant_filter in $variant_filters: + #set $variant_filter = "--%sExpression '%s' --%sName '%s'" % ( str( $variant_filter.is_genotype_filter ), str( $variant_filter.filter_expression ), str( $variant_filter.is_genotype_filter ), str( $variant_filter.filter_name ) ) + -o '${ hexlify( $variant_filter ) }' + #end for + + #if str( $mask_rod_bind_type.mask_rod_bind_type_selector ) == 'set_mask': + -d "--mask:${mask_rod_bind_type.mask_rod_name},%(file_type)s" "${mask_rod_bind_type.input_mask_rod}" "${mask_rod_bind_type.input_mask_rod.ext}" "input_mask_${mask_rod_bind_type.mask_rod_name}" + -p ' + --maskExtension "${mask_rod_bind_type.mask_extension}" + --maskName "${mask_rod_bind_type.mask_rod_name}" + ' + #end if + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if str( $reference_source.reference_source_selector ) == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + + ##start analysis specific options + #if $cluster_snp_type.cluster_snp_type_selector == "cluster_snp": + -p ' + --clusterSize "${cluster_snp_type.cluster_size}" + --clusterWindowSize "${cluster_snp_type.cluster_window_size}" + ' + #end if + -p '${missing_values_in_expressions_should_evaluate_as_failing}' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Filters variant calls using a number of user-selectable, parameterizable criteria. + +For more information on using the VariantFiltration module, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/VariantFiltrationWalker>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: VariantFiltration accepts a VCF input file. + + +**Outputs** + +The output is in VCF format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + + filterExpression One or more expression used with INFO fields to filter (see wiki docs for more info) + filterName Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered + genotypeFilterExpression One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info) + genotypeFilterName Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered + clusterSize The number of SNPs which make up a cluster (see also --clusterWindowSize); [default:3] + clusterWindowSize The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0] + maskName The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask'] + missingValuesInExpressionsShouldEvaluateAsFailing When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)? + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a variant_recalibrator.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_recalibrator.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,715 @@ + + + + gatk + + gatk2_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + #for $var_count, $variant in enumerate( $reference_source.variants ): + -d "--input:input_${var_count},%(file_type)s" "${variant.input_variants}" "${variant.input_variants.ext}" "input_variants_${var_count}" + #end for + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "VariantRecalibrator" + --num_threads 4 ##hard coded, for now + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file"##ET no phone home + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + --recal_file "${output_recal}" + --tranches_file "${output_tranches}" + --rscript_file "${output_rscript}" + ' + + #set $rod_binding_names = dict() + #for $rod_binding in $rod_bind: + #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': + #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name + #elif str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'comp': + #set $rod_bind_name = "comp" + $rod_binding.rod_bind_type.custom_rod_name + #else + #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector + #end if + #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 + #if $rod_binding.rod_bind_type.rod_training_type.rod_training_type_selector == "not_training_truth_known": + -d "--resource:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + #else: + -d "--resource:${rod_bind_name},%(file_type)s,known=${rod_binding.rod_bind_type.rod_training_type.known},training=${rod_binding.rod_bind_type.rod_training_type.training},truth=${rod_binding.rod_bind_type.rod_training_type.truth},bad=${rod_binding.rod_bind_type.rod_training_type.bad},prior=${rod_binding.rod_bind_type.rod_training_type.prior}" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + #end if + #end for + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if str( $reference_source.reference_source_selector ) == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + + ##start analysis specific options + -p ' + #if str( $annotations ) != "None": + #for $annotation in str( $annotations.fields.gatk_value ).split( ',' ): + --use_annotation "${annotation}" + #end for + #end if + #for $additional_annotation in $additional_annotations: + --use_annotation "${additional_annotation.additional_annotation_name}" + #end for + --mode "${mode}" + ' + + #if $analysis_param_type.analysis_param_type_selector == "advanced": + -p ' + --maxGaussians "${analysis_param_type.max_gaussians}" + --maxIterations "${analysis_param_type.max_iterations}" + --numKMeans "${analysis_param_type.num_k_means}" + --stdThreshold "${analysis_param_type.std_threshold}" + --qualThreshold "${analysis_param_type.qual_threshold}" + --shrinkage "${analysis_param_type.shrinkage}" + --dirichlet "${analysis_param_type.dirichlet}" + --priorCounts "${analysis_param_type.prior_counts}" + #if str( $analysis_param_type.bad_variant_selector.bad_variant_selector_type ) == 'percent': + --percentBadVariants "${analysis_param_type.bad_variant_selector.percent_bad_variants}" + #else: + --minNumBadVariants "${analysis_param_type.bad_variant_selector.min_num_bad_variants}" + #end if + --target_titv "${analysis_param_type.target_titv}" + #for $tranche in [ $tranche.strip() for $tranche in str( $analysis_param_type.ts_tranche ).split( ',' ) if $tranche.strip() ] + --TStranche "${tranche}" + #end for + #for $ignore_filter in $analysis_param_type.ignore_filters: + #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.ignore_filter_type_selector ) + #if $ignore_filter_name == "custom": + #set $ignore_filter_name = str( $ignore_filter.ignore_filter_type.filter_name ) + #end if + --ignore_filter "${ignore_filter_name}" + #end for + --ts_filter_level "${analysis_param_type.ts_filter_level}" + ' + #end if + + + && + mv "${output_rscript}.pdf" "${output_tranches_pdf}" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Takes variant calls as .vcf files, learns a Gaussian mixture model over the variant annotations and evaluates the variant -- assigning an informative lod score + +For more information on using the VariantRecalibrator module, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: VariantRecalibrator accepts a variant input file. + + +**Outputs** + +The output is in VCF format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + + tranches_file The output tranches file used by ApplyRecalibration + use_annotation The names of the annotations which should used for calculations + mode Recalibration mode to employ: 1.) SNP for recalibrating only snps (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both snps and indels simultaneously. (SNP|INDEL|BOTH) + maxGaussians The maximum number of Gaussians to try during variational Bayes algorithm + maxIterations The maximum number of VBEM iterations to be performed in variational Bayes algorithm. Procedure will normally end when convergence is detected. + numKMeans The number of k-means iterations to perform in order to initialize the means of the Gaussians in the Gaussian mixture model. + stdThreshold If a variant has annotations more than -std standard deviations away from mean then don't use it for building the Gaussian mixture model. + qualThreshold If a known variant has raw QUAL value less than -qual then don't use it for building the Gaussian mixture model. + shrinkage The shrinkage parameter in variational Bayes algorithm. + dirichlet The dirichlet parameter in variational Bayes algorithm. + priorCounts The number of prior counts to use in variational Bayes algorithm. + percentBadVariants What percentage of the worst scoring variants to use when building the Gaussian mixture model of bad variants. 0.07 means bottom 7 percent. + minNumBadVariants The minimum amount of worst scoring variants to use when building the Gaussian mixture model of bad variants. Will override -percentBad arugment if necessary. + recal_file The output recal file used by ApplyRecalibration + target_titv The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on optimization curve output figures. (approx 2.15 for whole genome experiments). ONLY USED FOR PLOTTING PURPOSES! + TStranche The levels of novel false discovery rate (FDR, implied by ti/tv) at which to slice the data. (in percent, that is 1.0 for 1 percent) + ignore_filter If specified the optimizer will use variants even if the specified filter name is marked in the input VCF file + path_to_Rscript The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript + rscript_file The output rscript file generated by the VQSR to aid in visualization of the input data and learned model + path_to_resources Path to resources folder holding the Sting R scripts. + ts_filter_level The truth sensitivity level at which to start filtering, used here to indicate filtered variants in plots + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a variant_select.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_select.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,574 @@ + + from VCF files + + gatk + + gatk2_wrapper.py + #from binascii import hexlify + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + -d "--variant:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant" + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "SelectVariants" + --num_threads 4 ##hard coded, for now + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + -o "${output_vcf}" + + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + ' + -p ' + #if $input_concordance: + --concordance "${input_concordance}" + #end if + #if $input_discordance: + --discordance "${input_discordance}" + #end if + + #for $exclude_sample_name in $exclude_sample_name_repeat: + --exclude_sample_name "${exclude_sample_name.exclude_sample_name}" + #end for + + ${exclude_filtered} + + #for $sample_name in $sample_name_repeat: + --sample_name "${sample_name.sample_name}" + #end for + + ' + + #for $select_expressions in $select_expressions_repeat: + #set $select_expression = "--select_expressions '%s'" % ( str( $select_expressions.select_expressions ) ) + -o '${ hexlify( $select_expression ) }' + #end for + + ##start tool specific options + #if str( $analysis_param_type.analysis_param_type_selector ) == 'advanced': + -p ' + #for $exclude_sample_file in $analysis_param_type.exclude_sample_file_repeat: + --exclude_sample_file "${exclude_sample_file.exclude_sample_file}" + #end for + + #for $sample_file in $analysis_param_type.sample_file_repeat: + --sample_file "${ample_file.sample_file}" + #end for + + #if $analysis_param_type.input_keep_ids: + --keepIDs "${analysis_param_type.input_keep_ids}" + #end if + + ${analysis_param_type.keep_original_AC} + + ${analysis_param_type.mendelian_violation} + + --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}" + + --remove_fraction_genotypes "${analysis_param_type.remove_fraction_genotypes}" + + --restrictAllelesTo "${analysis_param_type.restrict_alleles_to}" + + #if str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_fraction': + --select_random_fraction "${analysis_param_type.select_random_type.select_random_fraction}" + #elif str( $analysis_param_type.select_random_type.select_random_type_selector ) == 'select_random_number': + --select_random_number "${analysis_param_type.select_random_type.select_random_number}" + #end if + + #if $analysis_param_type.select_type_to_include: + #for $type_to_include in str( $analysis_param_type.select_type_to_include ).split( ',' ): + --selectTypeToInclude "${type_to_include}" + #end for + #end if + + ${analysis_param_type.exclude_non_variants} + ' + + #for $sample_expressions in $analysis_param_type.sample_expressions_repeat: + #set $sample_expression = "--sample_expressions '%s'" % ( str( $sample_expressions.sample_expressions ) ) + -o '${ hexlify( $sample_expression ) }' + #end for + + #end if + ##end tool specific options + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if str( $reference_source.reference_source_selector ) == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Often, a VCF containing many samples and/or variants will need to be subset in order to facilitate certain analyses (e.g. comparing and contrasting cases vs. controls; extracting variant or non-variant loci that meet certain requirements, displaying just a few samples in a browser like IGV, etc.). SelectVariants can be used for this purpose. Given a single VCF file, one or more samples can be extracted from the file (based on a complete sample name or a pattern match). Variants can be further selected by specifying criteria for inclusion, i.e. "DP > 1000" (depth of coverage greater than 1000x), "AF < 0.25" (sites with allele frequency less than 0.25). These JEXL expressions are documented in the Using JEXL expressions section (http://www.broadinstitute.org/gsa/wiki/index.php/Using_JEXL_expressions). One can optionally include concordance or discordance tracks for use in selecting overlapping variants. + +For more information on using the SelectVariants module, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/SelectVariants>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: SelectVariants accepts a VCF input file. + + +**Outputs** + +The output is in VCF format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + + out VCFWriter stdout File to which variants should be written + variant RodBinding[VariantContext] NA Input VCF file + concordance RodBinding[VariantContext] none Output variants that were also called in this comparison track + discordance RodBinding[VariantContext] none Output variants that were not called in this comparison track + exclude_sample_file Set[File] [] File containing a list of samples (one per line) to exclude. Can be specified multiple times + exclude_sample_name Set[String] [] Exclude genotypes from this sample. Can be specified multiple times + excludeFiltered boolean false Don't include filtered loci in the analysis + excludeNonVariants boolean false Don't include loci found to be non-variant after the subsetting procedure + keepIDs File NA Only emit sites whose ID is found in this file (one ID per line) + keepOriginalAC boolean false Don't update the AC, AF, or AN values in the INFO field after selecting + mendelianViolation Boolean false output mendelian violation sites only + mvq double 0.0 Minimum genotype QUAL score for each trio member required to accept a site as a violation + remove_fraction_genotypes double 0.0 Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall + restrictAllelesTo NumberAlleleRestriction ALL Select only variants of a particular allelicity. Valid options are ALL (default), MULTIALLELIC or BIALLELIC + sample_expressions Set[String] NA Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times + sample_file Set[File] NA File containing a list of samples (one per line) to include. Can be specified multiple times + sample_name Set[String] [] Include genotypes from this sample. Can be specified multiple times + select_expressions ArrayList[String] [] One or more criteria to use when selecting the data + select_random_fraction double 0.0 Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track + select_random_number int 0 Selects a number of variants at random from the variant track + selectTypeToInclude List[Type] [] Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + + diff -r b138c5569231 -r 432aafa6830a variants_validate.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variants_validate.xml Wed Sep 12 17:54:46 2012 -0400 @@ -0,0 +1,398 @@ + + + + gatk + + gatk2_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + -d "--variant:variant,%(file_type)s" "${reference_source.input_variant}" "${reference_source.input_variant.ext}" "input_variant" + -p 'java + -jar "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/GenomeAnalysisTK.jar" + -T "ValidateVariants" + + -et "NO_ET" -K "/data/galaxy/galaxy3/tool-data/shared/jars/gatk2/gatk2_key_file" ##ET no phone home + ##--num_threads 4 ##hard coded, for now + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + ${warn_on_errors} + ${do_not_validate_filtered_records} + ' + + #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp': + -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}" + #end if + + ##start standard gatk options + #if $gatk_param_type.gatk_param_type_selector == "advanced": + #for $pedigree in $gatk_param_type.pedigree: + -p '--pedigree "${pedigree.pedigree_file}"' + #end for + #for $pedigree_string in $gatk_param_type.pedigree_string_repeat: + -p '--pedigreeString "${pedigree_string.pedigree_string}"' + #end for + -p '--pedigreeValidationType "${gatk_param_type.pedigree_validation_type}"' + #for $read_filter in $gatk_param_type.read_filter: + -p '--read_filter "${read_filter.read_filter_type.read_filter_type_selector}" + ###raise Exception( str( dir( $read_filter ) ) ) + #for $name, $param in $read_filter.read_filter_type.iteritems(): + #if $name not in [ "__current_case__", "read_filter_type_selector" ]: + #if hasattr( $param.input, 'truevalue' ): + ${param} + #else: + --${name} "${param}" + #end if + #end if + #end for + ' + #end for + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_interval_repeat ): + -d "--intervals" "${input_intervals.input_intervals}" "${input_intervals.input_intervals.ext}" "input_intervals_${interval_count}" + #end for + + #for $interval_count, $input_intervals in enumerate( $gatk_param_type.input_exclude_interval_repeat ): + -d "--excludeIntervals" "${input_intervals.input_exclude_intervals}" "${input_intervals.input_exclude_intervals.ext}" "input_exlude_intervals_${interval_count}" + #end for + + -p '--interval_set_rule "${gatk_param_type.interval_set_rule}"' + + -p '--downsampling_type "${gatk_param_type.downsampling_type.downsampling_type_selector}"' + #if str( $gatk_param_type.downsampling_type.downsampling_type_selector ) != "NONE": + -p '--${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_type_selector} "${gatk_param_type.downsampling_type.downsample_to_type.downsample_to_value}"' + #end if + -p ' + --baq "${gatk_param_type.baq}" + --baqGapOpenPenalty "${gatk_param_type.baq_gap_open_penalty}" + ${gatk_param_type.use_original_qualities} + --defaultBaseQualities "${gatk_param_type.default_base_qualities}" + --validation_strictness "${gatk_param_type.validation_strictness}" + --interval_merging "${gatk_param_type.interval_merging}" + ${gatk_param_type.disable_experimental_low_memory_sharding} + ${gatk_param_type.non_deterministic_random_seed} + ' + #for $rg_black_list_count, $rg_black_list in enumerate( $gatk_param_type.read_group_black_list_repeat ): + #if $rg_black_list.read_group_black_list_type.read_group_black_list_type_selector == "file": + -d "--read_group_black_list" "${rg_black_list.read_group_black_list_type.read_group_black_list}" "txt" "input_read_group_black_list_${rg_black_list_count}" + #else + -p '--read_group_black_list "${rg_black_list.read_group_black_list_type.read_group_black_list}"' + #end if + #end for + #end if + + #if $reference_source.reference_source_selector == "history": + -d "-R" "${reference_source.ref_file}" "${reference_source.ref_file.ext}" "gatk_input" + #end if + ##end standard gatk options + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Validates a variants file. + +For more information on using the ValidateVariants module, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/VariantValidator>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: ValidateVariants accepts variant files as input. + + +**Outputs** + +The output is a log of variant validation. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + doNotValidateFilteredRecords should we skip validation on filtered records? + warnOnErrors should we just emit warnings on errors instead of terminating the run? + +------ + +**Citation** + +For the underlying tool, please cite `DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis AA, del Angel G, Rivas MA, Hanna M, McKenna A, Fennell TJ, Kernytsky AM, Sivachenko AY, Cibulskis K, Gabriel SB, Altshuler D, Daly MJ. A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nat Genet. 2011 May;43(5):491-8. <http://www.ncbi.nlm.nih.gov/pubmed/21478889>`_ + +If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* + + +