Mercurial > repos > devteam > samtools_mpileup
changeset 2:3aa48bcbc599 draft
Uploaded tarball for 0.0.3 version.
author | devteam |
---|---|
date | Wed, 12 Mar 2014 12:53:30 -0400 |
parents | b47a418ccfdc |
children | da0203c3461a |
files | samtools_mpileup.xml samtools_wrapper.py test-data/gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam test-data/samtools/mpileup/samtools_mpileup_out_1.log test-data/samtools/mpileup/samtools_mpileup_out_1.pileup test-data/samtools/mpileup/samtools_mpileup_out_2.bcf test-data/samtools_mpileup_in_1.bam test-data/samtools_mpileup_in_3.bam test-data/samtools_mpileup_out_1.log test-data/samtools_mpileup_out_1.pileup test-data/samtools_mpileup_out_2.bcf test-data/samtools_mpileup_out_2.log test-data/samtools_mpileup_out_3.log test-data/samtools_mpileup_out_4.bcf test-data/samtools_mpileup_out_4.log tool-data/sam_fa_indices.loc.sample tool-data/tool_data_table_conf.xml.sample tool_dependencies.xml |
diffstat | 18 files changed, 570 insertions(+), 356 deletions(-) [+] |
line wrap: on
line diff
--- a/samtools_mpileup.xml Wed Mar 12 12:52:52 2014 -0400 +++ b/samtools_mpileup.xml Wed Mar 12 12:53:30 2014 -0400 @@ -1,30 +1,46 @@ -<tool id="samtools_mpileup" name="MPileup" version="0.0.2"> - <description>SNP and indel caller</description> - <requirements> - <requirement type="package" version="0.1.18">samtools</requirement> - </requirements> - <command interpreter="python">samtools_wrapper.py - -p 'samtools mpileup' - --stdout "${output_log}" +<tool id="samtools_mpileup" name="MPileup" version="0.0.3"> + <description>SNP and indel caller</description> + <requirements> + <requirement type="package" version="0.1.19">samtools</requirement> + </requirements> + <command><![CDATA[ + #if $reference_source.reference_source_selector == "history": + ln -s "${reference_source.ref_file}" && samtools faidx `basename "${reference_source.ref_file}"` && samtools mpileup + #else: + samtools mpileup + #end if #if $reference_source.reference_source_selector != "history": - -p '-f "${reference_source.ref_file.fields.path}"' + -f "${reference_source.ref_file.fields.path}" #else: - -d "-f" "${reference_source.ref_file}" "fa" "reference_input" + -f "${reference_source.ref_file}" #end if #for $i, $input_bam in enumerate( $reference_source.input_bams ): - -d " " "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "bam_input_${i}" - -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "bam_input_${i}" ##hardcode galaxy ext type as bam_index + $input_bam.input_bam #end for - -p ' #if str( $advanced_options.advanced_options_selector ) == "advanced": - ${advanced_options.skip_anomalous_read_pairs} + #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter": + #if $advanced_options.filter_by_flags.require_flags: + --rf ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.require_flags).split(',')])} + #end if + #if $advanced_options.filter_by_flags.exclude_flags: + --ff ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.exclude_flags).split(',')])} + #end if + #end if + #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": + -l "$pasted_regions" + #elif str( $advanced_options.limit_by_region.limit_by_regions ) == "history" + -l "$bed_regions" + #end if + #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": + -G "$excluded_read_groups" + #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history" + -G "$read_groups" + #end if + ${advanced_options.skip_anomalous_read_pairs} ${advanced_options.disable_probabilistic_realignment} -C "${advanced_options.coefficient_for_downgrading}" -d "${advanced_options.max_reads_per_bam}" ${advanced_options.extended_BAQ_computation} - #if str( $advanced_options.position_list ) != 'None': - -l "${advanced_options.position_list}" - #end if -q "${advanced_options.minimum_mapping_quality}" -Q "${advanced_options.minimum_base_quality}" #if str( $advanced_options.region_string ): @@ -40,6 +56,9 @@ -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}" #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling': -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}" + -m "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}" + -F "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_read_fraction}" + ${genotype_likelihood_computation_type.perform_indel_calling.gapped_read_per_sample} #else: -I #end if @@ -47,159 +66,428 @@ #if len( $genotype_likelihood_computation_type.platform_list_repeat ): -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }" #end if + #else: + ${genotype_likelihood_computation_type.base_position_on_reads} + ${genotype_likelihood_computation_type.output_mapping_quality} #end if - > "${output_mpileup}" - ' - </command> - <inputs> - <conditional name="reference_source"> - <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> - <option value="cached">Locally cached</option> - <option value="history">History</option> - </param> - <when value="cached"> - <repeat name="input_bams" title="BAM file" min="1"> - <param name="input_bam" type="data" format="bam" label="BAM file"> - <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> - </param> - </repeat> - <param name="ref_file" type="select" label="Using reference genome"> - <options from_data_table="fasta_indexes"> - <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...--> - </options> - </param> - </when> - <when value="history"> <!-- FIX ME!!!! --> - <repeat name="input_bams" title="BAM file" min="1"> - <param name="input_bam" type="data" format="bam" label="BAM file"> - <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." /> - </param> - </repeat> - <param name="ref_file" type="data" format="fasta" label="Using reference file" /> - </when> - </conditional> - - - <conditional name="genotype_likelihood_computation_type"> - <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation"> - <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option> - <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option> - </param> - <when value="perform_genotype_likelihood_computation"> - <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" /> - <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." /> - <conditional name="perform_indel_calling"> - <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling"> - <option value="perform_indel_calling" selected="True">Perform INDEL calling</option> - <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option> + > "$output_mpileup" 2> "$output_log" + ]]></command> + <stdio> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> + <inputs> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> + <option value="cached">Locally cached</option> + <option value="history">History</option> </param> - <when value="perform_indel_calling"> - <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" /> + <when value="cached"> + <repeat name="input_bams" title="BAM file" min="1"> + <param name="input_bam" type="data" format="bam" label="BAM file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> + </param> + </repeat> + <param name="ref_file" type="select" label="Using reference genome"> + <options from_data_table="fasta_indexes" /> + <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...--> + </param> + </when> + <when value="history"> + <repeat name="input_bams" title="BAM file" min="1"> + <param name="input_bam" type="data" format="bam" label="BAM file"> + <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." /> + </param> + </repeat> + <param name="ref_file" type="data" format="fasta" label="Using reference file" /> + </when> + </conditional> + <conditional name="genotype_likelihood_computation_type"> + <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation"> + <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option> + <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option> + </param> + <when value="perform_genotype_likelihood_computation"> + <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" /> + <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." /> + <conditional name="perform_indel_calling"> + <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling"> + <option value="perform_indel_calling" selected="True">Perform INDEL calling</option> + <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option> + </param> + <when value="perform_indel_calling"> + <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" /> + <param name="minimum_gapped_reads_for_indel_candidates" type="integer" value="1" label="Minimum gapped reads for indel candidates" /> + <param name="minimum_gapped_read_fraction" type="float" value="0.002" label="Minimum fraction of gapped reads for candidates" /> + <param name="gapped_read_per_sample" type="boolean" truevalue="-p" falsevalue="" checked="False" label="Apply minimum values on a per-sample basis" /> + </when> + <when value="do_not_perform_indel_calling" /> + </conditional> + <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" /> + <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> + <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" /> + </repeat> + </when> + <when value="do_not_perform_genotype_likelihood_computation"> + <param name="base_position_on_reads" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Output base positions on reads" /> + <param name="output_mapping_quality" type="boolean" truevalue="-s" falsevalue="" checked="False" label="Output mapping quality" /> </when> - <when value="do_not_perform_indel_calling" /> - </conditional> - <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" /> - <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> - <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" /> - </repeat> - </when> - <when value="do_not_perform_genotype_likelihood_computation"> - <!-- Do nothing here --> - </when> - </conditional> - <conditional name="advanced_options"> - <param name="advanced_options_selector" type="select" label="Set advanced options"> - <option value="basic" selected="True">Basic</option> - <option value="advanced">Advanced</option> - </param> - <when value="advanced"> - <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" /> - <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" /> - <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" /> - <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" /> - <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" /> - <param name="position_list" type="data" format="bed" label="List of regions or sites on which to operate" optional="True" /> - <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" /> - <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" /> - <param name="region_string" type="text" value="" label="Only generate pileup in region" /> - <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" /> - <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" /> - </when> - <when value="basic" /> - </conditional> - </inputs> - <outputs> - <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}"> - <change_format> - <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" /> - </change_format> - </data> - <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> - </outputs> - <tests> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" value="phiX.fasta" ftype="fasta" /> - <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> - <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> - <param name="advanced_options_selector" value="basic" /> - <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_1.pileup" /> - <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> - </test> - <test> - <param name="reference_source_selector" value="history" /> - <param name="ref_file" value="phiX.fasta" ftype="fasta" /> - <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> - <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> - <param name="gap_extension_sequencing_error_probability" value="20" /> - <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> - <param name="perform_indel_calling_selector" value="perform_indel_calling" /> - <param name="skip_indel_calling_above_sample_depth" value="250" /> - <param name="gap_open_sequencing_error_probability" value="40" /> - <param name="platform_list_repeat" value="0" /> - <param name="advanced_options_selector" value="basic" /> - <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_2.bcf" /> - <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> - </test> - </tests> - <help> + </conditional> + <conditional name="advanced_options"> + <param name="advanced_options_selector" type="select" label="Set advanced options"> + <option value="basic" selected="True">Basic</option> + <option value="advanced">Advanced</option> + </param> + <when value="advanced"> + <conditional name="filter_by_flags"> + <param name="filter_flags" type="select" label="Set filter by flags"> + <option value="nofilter" selected="True">Do not filter</option> + <option value="filter">Filter by flags to exclude or require</option> + </param> + <when value="filter"> + <param name="require_flags" type="select" display="checkboxes" multiple="True" label="Require"> + <option value="1">Read is paired</option> + <option value="2">Read is mapped in a proper pair</option> + <option value="4">The read is unmapped</option> + <option value="8">The mate is unmapped</option> + <option value="16">Read strand</option> + <option value="32">Mate strand</option> + <option value="64">Read is the first in a pair</option> + <option value="128">Read is the second in a pair</option> + <option value="256">The alignment or this read is not primary</option> + <option value="512">The read fails platform/vendor quality checks</option> + <option value="1024">The read is a PCR or optical duplicate</option> + </param> + <param name="exclude_flags" type="select" display="checkboxes" multiple="True" label="Exclude"> + <option value="1">Read is paired</option> + <option value="2">Read is mapped in a proper pair</option> + <option value="4">The read is unmapped</option> + <option value="8">The mate is unmapped</option> + <option value="16">Read strand</option> + <option value="32">Mate strand</option> + <option value="64">Read is the first in a pair</option> + <option value="128">Read is the second in a pair</option> + <option value="256">The alignment or this read is not primary</option> + <option value="512">The read fails platform/vendor quality checks</option> + <option value="1024">The read is a PCR or optical duplicate</option> + </param> + </when> + <when value="nofilter" /> + </conditional> + <conditional name="limit_by_region"> + <param name="limit_by_regions" type="select" label="Select regions to call"> + <option value="no_limit" selected="True">Do not limit</option> + <option value="history">From an uploaded BED file</option> + <option value="paste">Paste a list of regions or BED</option> + </param> + <when value="history"> + <param name="bed_regions" type="data" format="bed" label="BED file"> + <validator type="dataset_ok_validator" /> + </param> + </when> + <when value="paste"> + <param name="region_paste" type="text" area="true" size="10x35" label="Regions" help="Paste a list of regions in BED format or as a list of chromosomes and positions"/> + </when> + <when value="no_limit" /> + </conditional> + <conditional name="exclude_read_group"> + <param name="exclude_read_groups" type="select" label="Select read groups to exclude"> + <option value="no_limit" selected="True">Do not exclude</option> + <option value="history">From an uploaded text file</option> + <option value="paste">Paste a list of read groups</option> + </param> + <when value="history"> + <param name="read_groups" type="data" format="txt" label="Text file"> + <validator type="dataset_ok_validator" /> + </param> + </when> + <when value="paste"> + <param name="group_paste" type="text" area="true" size="10x35" label="Read groups" help="Paste a list of read groups"/> + </when> + <when value="no_limit" /> + </conditional> + <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" /> + <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" /> + <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" /> + <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" /> + <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" /> + <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" /> + <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" /> + <param name="region_string" type="text" value="" label="Only generate pileup in region" /> + <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" /> + <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" /> + </when> + <when value="basic" /> + </conditional> + </inputs> + <configfiles> + <configfile name="excluded_read_groups"> +<![CDATA[ +<% +import re +%> +#set pasted_data = '' +#if str( $advanced_options.advanced_options_selector ) == "advanced": + #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": + #set regex=re.compile("\\s+") + #set pasted_data = '\t'.join( regex.split( str( $advanced_options.exclude_read_group['read_groups'] ) ) ) + #end if +#end if +${pasted_data} +]]> + </configfile> + <configfile name="pasted_regions"> +<![CDATA[ +<% +import re +%> +#set pasted_data = '' +#if str( $advanced_options.advanced_options_selector ) == "advanced": + #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": + #set regex=re.compile("\\s+") + #set pasted_data = '\t'.join( regex.split( str( $advanced_options.limit_by_region['region_paste'] ) ) ) + #end if +#end if +${pasted_data} +]]> + </configfile> + </configfiles> + <outputs> + <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}"> + <change_format> + <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" /> + </change_format> + </data> + <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" value="phiX.fasta" ftype="fasta" /> + <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" /> + <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> + <param name="advanced_options_selector" value="basic" /> + <param name="base_position_on_reads" value="true" /> + <param name="output_mapping_quality" value="true" /> + <output name="output_mpileup" file="samtools_mpileup_out_1.pileup" /> + <output name="output_log" file="samtools_mpileup_out_1.log" /> + </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" value="phiX.fasta" ftype="fasta" /> + <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" /> + <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> + <param name="gap_extension_sequencing_error_probability" value="20" /> + <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> + <param name="perform_indel_calling_selector" value="perform_indel_calling" /> + <param name="skip_indel_calling_above_sample_depth" value="250" /> + <param name="gap_open_sequencing_error_probability" value="40" /> + <param name="platform_list_repeat" value="0" /> + <param name="advanced_options_selector" value="basic" /> + <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_2.bcf" lines_diff="1" /> + <output name="output_log" file="samtools_mpileup_out_2.log" /> + </test> + <test> + <param name="reference_source_selector" value="cached" /> + <param name="input_bam" value="samtools_mpileup_in_3.bam" ftype="bam" dbkey="phiX" /> + <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> + <param name="gap_extension_sequencing_error_probability" value="20" /> + <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> + <param name="perform_indel_calling_selector" value="perform_indel_calling" /> + <param name="skip_indel_calling_above_sample_depth" value="250" /> + <param name="gap_open_sequencing_error_probability" value="40" /> + <param name="platform_list_repeat" value="0" /> + <param name="advanced_options_selector" value="basic" /> + <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_2.bcf" lines_diff="1" /> + <output name="output_log" file="samtools_mpileup_out_3.log" /> + </test> + <test> + <param name="reference_source_selector" value="cached" /> + <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" dbkey="phiX" /> + <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> + <param name="gap_extension_sequencing_error_probability" value="20" /> + <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> + <param name="perform_indel_calling_selector" value="perform_indel_calling" /> + <param name="skip_indel_calling_above_sample_depth" value="250" /> + <param name="gap_open_sequencing_error_probability" value="40" /> + <param name="platform_list_repeat" value="0" /> + <param name="advanced_options_selector" value="advanced" /> + <param name="advanced_options|filter_by_flags|filter_flags" value="nofilter" /> + <param name="advanced_options|limit_by_region|limit_by_regions" value="no_limit" /> + <param name="advanced_options|coefficient_for_downgrading" value="true" /> + <param name="advanced_options|max_reads_per_bam" value="200" /> + <param name="advanced_options|extended_BAQ_computation" value="true" /> + <param name="advanced_options|minimum_mapping_quality" value="0" /> + <param name="advanced_options|minimum_base_quality" value="43" /> + <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_4.bcf" lines_diff="1" /> + <output name="output_log" file="samtools_mpileup_out_4.log" /> + </test> + </tests> + <help> **What it does** Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. ------ -**Settings**:: +.. list-table:: **Input options** + :widths: 5 5 40 10 + :header-rows: 1 + + * - Flag + - Type + - Description + - Default + * - -6 + - *BOOLEAN* + - assume the quality is in the Illumina-1.3+ encoding + - off + * - -A + - *BOOLEAN* + - count anomalous read pairs + - off + * - -B + - *BOOLEAN* + - disable BAQ computation + - off + * - -b + - *FILE* + - list of input BAM filenames, one per line + - *null* + * - -C + - *INT* + - parameter for adjusting mapQ; 0 to disable + - 0 + * - -d + - *INT* + - max per-BAM depth to avoid excessive memory usage + - 250 + * - -E + - *BOOLEAN* + - recalculate extended BAQ on the fly thus ignoring existing BQs + - off + * - -f + - *FILE* + - faidx indexed reference sequence file + - *null* + * - -G + - *FILE* + - exclude read groups listed in FILE + - *null* + * - -l + - *FILE* + - list of positions (chr pos) or regions (BED) + - *null* + * - -M + - *INT* + - cap mapping quality at INT + - 60 + * - -r + - *STR* + - region in which pileup is generated + - *null* + * - -R + - *BOOLEAN* + - ignore RG tags + - off + * - -q + - *INT* + - skip alignments with mapQ smaller than INT + - 0 + * - -Q + - *INT* + - skip bases with baseQ/BAQ smaller than INT + - 13 + * - --rf + - *INT* + - required flags: skip reads with mask bits unset + - 0 + * - --ff + - *INT* + - filter flags: skip reads with mask bits set + - 0 + +------ - Input Options: - -6 Assume the quality is in the Illumina 1.3+ encoding. - -A Do not skip anomalous read pairs in variant calling. - -B Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments. - -b FILE List of input BAM files, one file per line [null] - -C INT Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0] - -d INT At a position, read maximally INT reads per input BAM. [250] - -E Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit. - -f FILE The faidx-indexed reference file in the FASTA format. The file can be optionally compressed by razip. [null] - -l FILE BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null] - -q INT Minimum mapping quality for an alignment to be used [0] - -Q INT Minimum base quality for a base to be considered [13] - -r STR Only generate pileup in region STR [all sites] - Output Options: - - -D Output per-sample read depth - -g Compute genotype likelihoods and output them in the binary call format (BCF). - -S Output per-sample Phred-scaled strand bias P-value - -u Similar to -g except that the output is uncompressed BCF, which is preferred for piping. - - Options for Genotype Likelihood Computation (for -g or -u): - - -e INT Phred-scaled gap extension sequencing error probability. Reducing INT leads to longer indels. [20] - -h INT Coefficient for modeling homopolymer errors. Given an l-long homopolymer run, the sequencing error of an indel of size s is modeled as INT*s/l. [100] - -I Do not perform INDEL calling - -L INT Skip INDEL calling if the average per-sample depth is above INT. [250] - -o INT Phred-scaled gap open sequencing error probability. Reducing INT leads to more indel calls. [40] - -P STR Comma dilimited list of platforms (determined by @RG-PL) from which indel candidates are obtained. It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA. [all] +.. list-table:: **Output options** + :widths: 5 5 40 10 + :header-rows: 1 + + * - Flag + - Type + - Description + - Default + * - -D + - *BOOLEAN* + - output per-sample DP in BCF (require -g/-u) + - off + * - -g + - *BOOLEAN* + - generate BCF output (genotype likelihoods) + - off + * - -O + - *BOOLEAN* + - output base positions on reads (disabled by -g/-u) + - off + * - -s + - *BOOLEAN* + - output mapping quality (disabled by -g/-u) + - off + * - -S + - *BOOLEAN* + - output per-sample strand bias P-value in BCF (require -g/-u) + - off + * - -u + - *BOOLEAN* + - generate uncompressed BCF output + - off + +------ + +.. list-table:: **SNP/INDEL genotype likelihoods options (effective with '-g' or '-u')** + :widths: 5 5 40 10 + :header-rows: 1 + + * - Flag + - Type + - Description + - Default + * - -e + - *INT* + - Phred-scaled gap extension seq error probability + - 20 + * - -F + - *FLOAT* + - minimum fraction of gapped reads for candidates + - 0.002 + * - -h + - *INT* + - coefficient for homopolymer errors + - 100 + * - -I + - *BOOLEAN* + - do not perform indel calling + - off + * - -L + - *INT* + - max per-sample depth for INDEL calling + - 250 + * - -m + - *INT* + - minimum gapped reads for indel candidates + - 1 + * - -o + - *INT* + - Phred-scaled gap open sequencing error probability + - 40 + * - -p + - *BOOLEAN* + - apply -m and -F per-sample to increase sensitivity + - off + * - -P + - *STR* + - comma separated list of platforms for indels + - all ------ @@ -207,7 +495,7 @@ For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_ + If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* - - </help> -</tool> + </help> +</tool> \ No newline at end of file
--- a/samtools_wrapper.py Wed Mar 12 12:52:52 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,110 +0,0 @@ -#!/usr/bin/env python -#Dan Blankenberg - -""" -A wrapper script for running SAMTools commands. -""" - -import sys, optparse, os, tempfile, subprocess, shutil -from string import Template - -GALAXY_EXT_TO_SAMTOOLS_EXT = { 'bam_index':'bam.bai', } #items not listed here will use the galaxy extension as-is -GALAXY_EXT_TO_SAMTOOLS_FILE_TYPE = GALAXY_EXT_TO_SAMTOOLS_EXT #for now, these are the same, but could be different if needed -DEFAULT_SAMTOOLS_PREFIX = "SAMTools_file" -CHUNK_SIZE = 2**20 #1mb - - -def cleanup_before_exit( tmp_dir ): - if tmp_dir and os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - -def SAMTOOLS_filename_from_galaxy( galaxy_filename, galaxy_ext, target_dir = None, prefix = None ): - suffix = GALAXY_EXT_TO_SAMTOOLS_EXT.get( galaxy_ext, galaxy_ext ) - if prefix is None: - prefix = DEFAULT_SAMTOOLS_PREFIX - if target_dir is None: - target_dir = os.getcwd() - SAMTools_filename = os.path.join( target_dir, "%s.%s" % ( prefix, suffix ) ) - os.symlink( galaxy_filename, SAMTools_filename ) - return SAMTools_filename - -def SAMTOOLS_filetype_argument_substitution( argument, galaxy_ext ): - return argument % dict( file_type = GALAXY_EXT_TO_SAMTOOLS_FILE_TYPE.get( galaxy_ext, galaxy_ext ) ) - -def open_file_from_option( filename, mode = 'rb' ): - if filename: - return open( filename, mode = mode ) - return None - -def html_report_from_directory( html_out, dir ): - html_out.write( '<html>\n<head>\n<title>Galaxy - SAMTOOLS Output</title>\n</head>\n<body>\n<p/>\n<ul>\n' ) - for fname in sorted( os.listdir( dir ) ): - html_out.write( '<li><a href="%s">%s</a></li>\n' % ( fname, fname ) ) - html_out.write( '</ul>\n</body>\n</html>\n' ) - -def __main__(): - #Parse Command Line - parser = optparse.OptionParser() - parser.add_option( '-p', '--pass_through', dest='pass_through_options', action='append', type="string", help='These options are passed through directly to SAMTOOLS, without any modification.' ) - parser.add_option( '-d', '--dataset', dest='datasets', action='append', type="string", nargs=4, help='"-argument" "original_filename" "galaxy_filetype" "name_prefix"' ) - parser.add_option( '', '--stdout', dest='stdout', action='store', type="string", default=None, help='If specified, the output of stdout will be written to this file.' ) - parser.add_option( '', '--stderr', dest='stderr', action='store', type="string", default=None, help='If specified, the output of stderr will be written to this file.' ) - parser.add_option( '', '--html_report_from_directory', dest='html_report_from_directory', action='append', type="string", nargs=2, help='"Target HTML File" "Directory"') - (options, args) = parser.parse_args() - - tmp_dir = tempfile.mkdtemp( prefix='tmp-SAMTOOLS-' ) - - #set up stdout and stderr output options - stdout = open_file_from_option( options.stdout, mode = 'wb' ) - stderr = open_file_from_option( options.stderr, mode = 'wb' ) - #if no stderr file is specified, we'll use our own - if stderr is None: - stderr = tempfile.NamedTemporaryFile( prefix="SAMTOOLS-stderr-", dir=tmp_dir ) - - if options.pass_through_options: - cmd = ' '.join( options.pass_through_options ) - else: - cmd = '' - return_code = None - if options.datasets: - for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets: - SAMTools_filename = SAMTOOLS_filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix ) - if dataset_arg: - if '>' in cmd: - cmd = cmd.replace( '>', ' %s "%s" >' % ( SAMTOOLS_filetype_argument_substitution( dataset_arg, galaxy_ext ), SAMTools_filename ), 1 ) - else: - cmd = '%s %s "%s"' % ( cmd, SAMTOOLS_filetype_argument_substitution( dataset_arg, galaxy_ext ), SAMTools_filename ) - #auto index fasta files: - if galaxy_ext == 'fa': - index_cmd = 'samtools faidx %s' % ( SAMTools_filename ) - proc = subprocess.Popen( args=index_cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) - return_code = proc.wait() - if return_code: - break - if return_code is None or not return_code: - proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) - return_code = proc.wait() - if return_code: - stderr_target = sys.stderr - else: - if stdout: - stderr_target = stdout - else: - stderr_target = sys.stdout - stderr.flush() - stderr.seek(0) - while True: - chunk = stderr.read( CHUNK_SIZE ) - if chunk: - stderr_target.write( chunk ) - else: - break - stderr.close() - #generate html reports - if options.html_report_from_directory: - for ( html_filename, html_dir ) in options.html_report_from_directory: - html_report_from_directory( open( html_filename, 'wb' ), html_dir ) - - cleanup_before_exit( tmp_dir ) - -if __name__=="__main__": __main__()
--- a/test-data/samtools/mpileup/samtools_mpileup_out_1.log Wed Mar 12 12:52:52 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -[mpileup] 1 samples in 1 input files -<mpileup> Set max per-file depth to 8000
--- a/test-data/samtools/mpileup/samtools_mpileup_out_1.pileup Wed Mar 12 12:52:52 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -phiX174 1411 A 1 ^P. $ -phiX174 1412 G 3 .^D.^F. "$$ -phiX174 1413 C 5 ...^D.^F. """$$ -phiX174 1414 G 6 .....^F. #####$ -phiX174 1415 C 7 ......^F. %%%%%%& -phiX174 1416 C 8 .......^F. $$$$$$$$ -phiX174 1417 G 9 ........^F. "#######$ -phiX174 1418 T 10 .........^F. """""""""$ -phiX174 1419 G 10 .......... """""'&'%$ -phiX174 1420 G 10 .......... """""""""" -phiX174 1421 A 10 .......... """""""""" -phiX174 1422 T 10 .......... """""""""" -phiX174 1423 G 10 .......... """""""""# -phiX174 1424 C 10 ..A.AAAAAA %""""""""" -phiX174 1425 C 10 .......... $$$""""""" -phiX174 1426 T 10 .......... #####""""" -phiX174 1427 G 10 .......... ######"""" -phiX174 1428 A 10 .......... """""""""" -phiX174 1429 C 10 .......... ((((((&("" -phiX174 1430 C 10 .......... $$$$$$$$$" -phiX174 1431 G 10 .......... ########## -phiX174 1432 T 10 .......... """""""""" -phiX174 1433 A 10 .......... ########## -phiX174 1434 C 10 .......... ((((((&(%$ -phiX174 1435 C 10 .......... $$$$$$$$$$ -phiX174 1436 G 10 .......... ########## -phiX174 1437 A 10 .......... """""""""! -phiX174 1438 G 10 .......... """""####! -phiX174 1439 G 10 .......... """""""""! -phiX174 1440 C 10 .......... """""""""! -phiX174 1441 T 10 .......... """"""""#! -phiX174 1442 A 10 .......... $$$%%%&&%! -phiX174 1443 A 10 .-1C.-1C..-1C...... """""""""! -phiX174 1444 C 10 **.*...... &%"!"""""! -phiX174 1445 C 10 .......... &%&!%%%&%! -phiX174 1446 C 10 .......... """!"""""! -phiX174 1447 T 10 .$..$....... #"#!"""""! -phiX174 1448 A 8 .$..$..... #!#%%$$! -phiX174 1449 A 6 .$.$.... !""""! -phiX174 1450 T 4 .$... """! -phiX174 1451 G 3 .$.. #"! -phiX174 1452 A 2 .$. "! -phiX174 1453 G 1 .$ !
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/samtools_mpileup_out_1.log Wed Mar 12 12:53:30 2014 -0400 @@ -0,0 +1,3 @@ +[fai_load] build FASTA index. +[mpileup] 1 samples in 1 input files +<mpileup> Set max per-file depth to 8000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/samtools_mpileup_out_1.pileup Wed Mar 12 12:53:30 2014 -0400 @@ -0,0 +1,43 @@ +phiX174 1411 A 0 P 1 +phiX174 1412 G 0 PDF 2,1,1 +phiX174 1413 C 0 PDFDF 3,2,2,1,1 +phiX174 1414 G 0 PDFDFF 4,3,3,2,2,1 +phiX174 1415 C 0 PDFDFFF 5,4,4,3,3,2,1 +phiX174 1416 C 0 PDFDFFFF 6,5,5,4,4,3,2,1 +phiX174 1417 G 0 PDFDFFFFF 7,6,6,5,5,4,3,2,1 +phiX174 1418 T 0 PDFDFFFFFF 8,7,7,6,6,5,4,3,2,1 +phiX174 1419 G 0 PDFDFFFFFF 9,8,8,7,7,6,5,4,3,2 +phiX174 1420 G 0 PDFDFFFFFF 10,9,9,8,8,7,6,5,4,3 +phiX174 1421 A 0 PDFDFFFFFF 11,10,10,9,9,8,7,6,5,4 +phiX174 1422 T 0 PDFDFFFFFF 12,11,11,10,10,9,8,7,6,5 +phiX174 1423 G 0 PDFDFFFFFF 13,12,12,11,11,10,9,8,7,6 +phiX174 1424 C 0 PDFDFFFFFF 14,13,13,12,12,11,10,9,8,7 +phiX174 1425 C 0 PDFDFFFFFF 15,14,14,13,13,12,11,10,9,8 +phiX174 1426 T 0 PDFDFFFFFF 16,15,15,14,14,13,12,11,10,9 +phiX174 1427 G 0 PDFDFFFFFF 17,16,16,15,15,14,13,12,11,10 +phiX174 1428 A 0 PDFDFFFFFF 18,17,17,16,16,15,14,13,12,11 +phiX174 1429 C 0 PDFDFFFFFF 19,18,18,17,17,16,15,14,13,12 +phiX174 1430 C 0 PDFDFFFFFF 20,19,19,18,18,17,16,15,14,13 +phiX174 1431 G 0 PDFDFFFFFF 21,20,20,19,19,18,17,16,15,14 +phiX174 1432 T 0 PDFDFFFFFF 22,21,21,20,20,19,18,17,16,15 +phiX174 1433 A 0 PDFDFFFFFF 23,22,22,21,21,20,19,18,17,16 +phiX174 1434 C 0 PDFDFFFFFF 24,23,23,22,22,21,20,19,18,17 +phiX174 1435 C 0 PDFDFFFFFF 25,24,24,23,23,22,21,20,19,18 +phiX174 1436 G 0 PDFDFFFFFF 26,25,25,24,24,23,22,21,20,19 +phiX174 1437 A 0 PDFDFFFFFF 27,26,26,25,25,24,23,22,21,20 +phiX174 1438 G 0 PDFDFFFFFF 28,27,27,26,26,25,24,23,22,21 +phiX174 1439 G 0 PDFDFFFFFF 29,28,28,27,27,26,25,24,23,22 +phiX174 1440 C 0 PDFDFFFFFF 30,29,29,28,28,27,26,25,24,23 +phiX174 1441 T 0 PDFDFFFFFF 31,30,30,29,29,28,27,26,25,24 +phiX174 1442 A 0 PDFDFFFFFF 32,31,31,30,30,29,28,27,26,25 +phiX174 1443 A 0 PDFDFFFFFF 33,32,32,31,31,30,29,28,27,26 +phiX174 1444 C 0 PDFDFFFFFF 34,33,33,32,32,31,30,29,28,27 +phiX174 1445 C 0 PDFDFFFFFF 34,33,34,32,33,32,31,30,29,28 +phiX174 1446 C 0 PDFDFFFFFF 35,34,35,33,34,33,32,31,30,29 +phiX174 1447 T 0 PDFDFFFFFF 36,35,36,34,35,34,33,32,31,30 +phiX174 1448 A 0 DDFFFFFF 36,35,36,35,34,33,32,31 +phiX174 1449 A 0 DFFFFF 36,36,35,34,33,32 +phiX174 1450 T 0 FFFF 36,35,34,33 +phiX174 1451 G 0 FFF 36,35,34 +phiX174 1452 A 0 FF 36,35 +phiX174 1453 G 0 F 36
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/samtools_mpileup_out_2.bcf Wed Mar 12 12:53:30 2014 -0400 @@ -0,0 +1,33 @@ +##fileformat=VCFv4.1 +##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth"> +##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"> +##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads"> +##INFO=<ID=FQ,Number=1,Type=Float,Description="Phred probability of all samples being the same"> +##INFO=<ID=AF1,Number=1,Type=Float,Description="Max-likelihood estimate of the first ALT allele frequency (assuming HWE)"> +##INFO=<ID=AC1,Number=1,Type=Float,Description="Max-likelihood estimate of the first ALT allele count (no HWE assumption)"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=IS,Number=2,Type=Float,Description="Maximum number of reads supporting an indel and fraction of indel reads"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes for each ALT allele, in the same order as listed"> +##INFO=<ID=G3,Number=3,Type=Float,Description="ML estimate of genotype frequencies"> +##INFO=<ID=HWE,Number=1,Type=Float,Description="Chi^2 based HWE test P-value based on G3"> +##INFO=<ID=CLR,Number=1,Type=Integer,Description="Log ratio of genotype likelihoods with and without the constraint"> +##INFO=<ID=UGT,Number=1,Type=String,Description="The most probable unconstrained genotype configuration in the trio"> +##INFO=<ID=CGT,Number=1,Type=String,Description="The most probable constrained genotype configuration in the trio"> +##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias"> +##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL."> +##INFO=<ID=PC2,Number=2,Type=Integer,Description="Phred probability of the nonRef allele frequency in group1 samples being larger (,smaller) than in group2."> +##INFO=<ID=PCHI2,Number=1,Type=Float,Description="Posterior weighted chi^2 P-value for testing the association between group1 and group2 samples."> +##INFO=<ID=QCHI2,Number=1,Type=Integer,Description="Phred scaled PCHI2."> +##INFO=<ID=PR,Number=1,Type=Integer,Description="# permutations yielding a smaller PCHI2."> +##INFO=<ID=QBD,Number=1,Type=Float,Description="Quality by Depth: QUAL/#reads"> +##INFO=<ID=RPB,Number=1,Type=Float,Description="Read Position Bias"> +##INFO=<ID=MDV,Number=1,Type=Integer,Description="Maximum number of high-quality nonRef reads in samples"> +##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias (v2) for filtering splice-site artefacts in RNA-seq data. Note: this version may be broken."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=GL,Number=3,Type=Float,Description="Likelihoods for RR,RA,AA genotypes (R=ref,A=alt)"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases"> +##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality non-reference bases"> +##FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/samtools_mpileup_out_2.log Wed Mar 12 12:53:30 2014 -0400 @@ -0,0 +1,3 @@ +[fai_load] build FASTA index. +[mpileup] 1 samples in 1 input files +<mpileup> Set max per-file depth to 8000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/samtools_mpileup_out_3.log Wed Mar 12 12:53:30 2014 -0400 @@ -0,0 +1,2 @@ +[mpileup] 1 samples in 1 input files +<mpileup> Set max per-file depth to 8000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/samtools_mpileup_out_4.bcf Wed Mar 12 12:53:30 2014 -0400 @@ -0,0 +1,33 @@ +##fileformat=VCFv4.1 +##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth"> +##INFO=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"> +##INFO=<ID=MQ,Number=1,Type=Integer,Description="Root-mean-square mapping quality of covering reads"> +##INFO=<ID=FQ,Number=1,Type=Float,Description="Phred probability of all samples being the same"> +##INFO=<ID=AF1,Number=1,Type=Float,Description="Max-likelihood estimate of the first ALT allele frequency (assuming HWE)"> +##INFO=<ID=AC1,Number=1,Type=Float,Description="Max-likelihood estimate of the first ALT allele count (no HWE assumption)"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=IS,Number=2,Type=Float,Description="Maximum number of reads supporting an indel and fraction of indel reads"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes for each ALT allele, in the same order as listed"> +##INFO=<ID=G3,Number=3,Type=Float,Description="ML estimate of genotype frequencies"> +##INFO=<ID=HWE,Number=1,Type=Float,Description="Chi^2 based HWE test P-value based on G3"> +##INFO=<ID=CLR,Number=1,Type=Integer,Description="Log ratio of genotype likelihoods with and without the constraint"> +##INFO=<ID=UGT,Number=1,Type=String,Description="The most probable unconstrained genotype configuration in the trio"> +##INFO=<ID=CGT,Number=1,Type=String,Description="The most probable constrained genotype configuration in the trio"> +##INFO=<ID=PV4,Number=4,Type=Float,Description="P-values for strand bias, baseQ bias, mapQ bias and tail distance bias"> +##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL."> +##INFO=<ID=PC2,Number=2,Type=Integer,Description="Phred probability of the nonRef allele frequency in group1 samples being larger (,smaller) than in group2."> +##INFO=<ID=PCHI2,Number=1,Type=Float,Description="Posterior weighted chi^2 P-value for testing the association between group1 and group2 samples."> +##INFO=<ID=QCHI2,Number=1,Type=Integer,Description="Phred scaled PCHI2."> +##INFO=<ID=PR,Number=1,Type=Integer,Description="# permutations yielding a smaller PCHI2."> +##INFO=<ID=QBD,Number=1,Type=Float,Description="Quality by Depth: QUAL/#reads"> +##INFO=<ID=RPB,Number=1,Type=Float,Description="Read Position Bias"> +##INFO=<ID=MDV,Number=1,Type=Integer,Description="Maximum number of high-quality nonRef reads in samples"> +##INFO=<ID=VDB,Number=1,Type=Float,Description="Variant Distance Bias (v2) for filtering splice-site artefacts in RNA-seq data. Note: this version may be broken."> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=GL,Number=3,Type=Float,Description="Likelihoods for RR,RA,AA genotypes (R=ref,A=alt)"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="# high-quality bases"> +##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality non-reference bases"> +##FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT
--- a/tool-data/sam_fa_indices.loc.sample Wed Mar 12 12:52:52 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of Samtools indexed sequences data files. You will need -#to create these data files and then create a sam_fa_indices.loc file -#similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The sam_fa_indices.loc -#file has this format (white space characters are TAB characters): -# -#index <seq> <location> -# -#So, for example, if you had hg18 indexed stored in -#/depot/data2/galaxy/sam/, -#then the sam_fa_indices.loc entry would look like this: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -# -#and your /depot/data2/galaxy/sam/ directory -#would contain hg18.fa and hg18.fa.fai files: -# -#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa -#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai -# -#Your sam_fa_indices.loc file should include an entry per line for -#each index set you have stored. The file in the path does actually -#exist, but it should never be directly used. Instead, the name serves -#as a prefix for the index file. For example: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -#index hg19 /depot/data2/galaxy/sam/hg19.fa
--- a/tool-data/tool_data_table_conf.xml.sample Wed Mar 12 12:52:52 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> -<tables> - <!-- Location of SAMTools indexes and other files --> - <table name="sam_fa_indexes" comment_char="#"> - <columns>line_type, value, path</columns> - <file path="tool-data/sam_fa_indices.loc" /> - </table> -</tables> \ No newline at end of file
--- a/tool_dependencies.xml Wed Mar 12 12:52:52 2014 -0400 +++ b/tool_dependencies.xml Wed Mar 12 12:53:30 2014 -0400 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="samtools" version="0.1.18"> - <repository changeset_revision="c0f72bdba484" name="package_samtools_0_1_18" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <package name="samtools" version="0.1.19"> + <repository changeset_revision="233326db3402" name="package_samtools_0_1_19" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>