Mercurial > repos > devteam > realigner_target_creator
diff realigner_target_creator.xml @ 0:751f4d177d8e
Imported from capsule None
author | devteam |
---|---|
date | Tue, 01 Apr 2014 09:12:09 -0400 |
parents | |
children | f202477a521b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/realigner_target_creator.xml Tue Apr 01 09:12:09 2014 -0400 @@ -0,0 +1,165 @@ +<tool id="gatk_realigner_target_creator" name="Realigner Target Creator" version="0.0.4"> + <description>for use in local realignment</description> + <requirements> + <requirement type="package" version="1.4">gatk</requirement> + <requirement type="package" version="0.1.18">samtools</requirement> + </requirements> + <macros> + <import>gatk_macros.xml</import> + </macros> + <command interpreter="python">gatk_wrapper.py + --max_jvm_heap_fraction "1" + --stdout "${output_log}" + -d "-I" "${reference_source.input_bam}" "${reference_source.input_bam.ext}" "gatk_input" + #if str( $reference_source.input_bam.metadata.bam_index ) != "None": + -d "" "${reference_source.input_bam.metadata.bam_index}" "bam_index" "gatk_input" ##hardcode galaxy ext type as bam_index + #end if + -p 'java + -jar "${JAVA_JAR_PATH}/GenomeAnalysisTK.jar" + -T "RealignerTargetCreator" + -o "${output_interval}" + -et "NO_ET" ##ET no phone home + --num_threads \${GALAXY_SLOTS:-4} + ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout + #if $reference_source.reference_source_selector != "history": + -R "${reference_source.ref_file.fields.path}" + #end if + ' + #set $rod_binding_names = dict() + #for $rod_binding in $rod_bind: + #if str( $rod_binding.rod_bind_type.rod_bind_type_selector ) == 'custom': + #set $rod_bind_name = $rod_binding.rod_bind_type.custom_rod_name + #else + #set $rod_bind_name = $rod_binding.rod_bind_type.rod_bind_type_selector + #end if + #set $rod_binding_names[$rod_bind_name] = $rod_binding_names.get( $rod_bind_name, -1 ) + 1 + -d "-known:${rod_bind_name},%(file_type)s" "${rod_binding.rod_bind_type.input_rod}" "${rod_binding.rod_bind_type.input_rod.ext}" "input_${rod_bind_name}_${rod_binding_names[$rod_bind_name]}" + #end for + + #include source=$standard_gatk_options# + ##start analysis specific options + #if $analysis_param_type.analysis_param_type_selector == "advanced": + -p ' + --minReadsAtLocus "${analysis_param_type.minReadsAtLocus}" + --windowSize "${analysis_param_type.windowSize}" + --mismatchFraction "${analysis_param_type.mismatchFraction}" + --maxIntervalSize "${analysis_param_type.maxIntervalSize}" + ' + #end if + </command> + <inputs> + <conditional name="reference_source"> + <expand macro="reference_source_selector_param" /> + <when value="cached"> + <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &lt;input_file&gt;"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="gatk_picard_indexes" metadata_name="dbkey" metadata_column="dbkey" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> + </param> + <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &lt;reference_sequence&gt;" > + <options from_data_table="gatk_picard_indexes"> + <filter type="data_meta" key="dbkey" ref="input_bam" column="dbkey"/> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> + <param name="input_bam" type="data" format="bam" label="BAM file" help="-I,--input_file &lt;input_file&gt;" /> + <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &lt;reference_sequence&gt;"> + <options> + <filter type="data_meta" key="dbkey" ref="input_bam" /> + </options> + </param> + </when> + </conditional> + + <repeat name="rod_bind" title="Binding for reference-ordered data" help="-known,--known &lt;known&gt;"> + <conditional name="rod_bind_type"> + <param name="rod_bind_type_selector" type="select" label="Binding Type"> + <option value="dbsnp" selected="True">dbSNP</option> + <option value="snps">SNPs</option> + <option value="indels">INDELs</option> + <option value="custom">Custom</option> + </param> + <when value="dbsnp"> + <param name="input_rod" type="data" format="vcf" label="ROD file" /> + </when> + <when value="snps"> + <param name="input_rod" type="data" format="vcf" label="ROD file" /> + </when> + <when value="indels"> + <param name="input_rod" type="data" format="vcf" label="ROD file" /> + </when> + <when value="custom"> + <param name="custom_rod_name" type="text" value="Unknown" label="ROD Name"/> + <param name="input_rod" type="data" format="vcf" label="ROD file" /> + </when> + </conditional> + </repeat> + + <expand macro="gatk_param_type_conditional" /> + + <expand macro="analysis_type_conditional"> + <param name="windowSize" type="integer" value="10" label="Window size for calculating entropy or SNP clusters (windowSize)" help="-window,--windowSize &lt;windowSize&gt;" /> + <param name="mismatchFraction" type="float" value="0.15" label="Fraction of base qualities needing to mismatch for a position to have high entropy (mismatchFraction)" help="to disable set to <= 0 or > 1 (-mismatch,--mismatchFraction &lt;mismatchFraction&gt;)"/> + <param name="minReadsAtLocus" type="integer" value="4" label="Minimum reads at a locus to enable using the entropy calculation (minReadsAtLocus)" help="-minReads,--minReadsAtLocus &lt;minReadsAtLocus&gt;" /> + <param name="maxIntervalSize" type="integer" value="500" label="Maximum interval size" help="-maxInterval,--maxIntervalSize &lt;maxIntervalSize&gt;" /> + </expand> + </inputs> + <outputs> + <data format="gatk_interval" name="output_interval" label="${tool.name} on ${on_string} (GATK intervals)" /> + <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" value="phiX.fasta" ftype="fasta" /> + <param name="input_bam" value="gatk/fake_phiX_reads_1.bam" ftype="bam" /> + <param name="rod_bind_type_selector" value="dbsnp" /> + <param name="input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /> + <param name="gatk_param_type_selector" value="basic" /> + <param name="analysis_param_type_selector" value="advanced" /> + <param name="windowSize" value="10" /> + <param name="mismatchFraction" value="0.15" /> + <param name="minReadsAtLocus" value="4" /> + <param name="maxIntervalSize" value="500" /> + <output name="output_interval" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.gatk_interval" /> + <output name="output_log" file="gatk/gatk_realigner_target_creator/gatk_realigner_target_creator_out_1.log.contains" compare="contains"/> + </test> + </tests> + <help> +**What it does** + +Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string. + +For more information on local realignment around indels using the GATK, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/Local_realignment_around_indels>`_. + +To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. + +If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. + +------ + +**Inputs** + +GenomeAnalysisTK: RealignerTargetCreator accepts an aligned BAM input file. + + +**Outputs** + +The output is in GATK Interval format. + + +Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. + +------- + +**Settings**:: + + windowSize window size for calculating entropy or SNP clusters + mismatchFraction fraction of base qualities needing to mismatch for a position to have high entropy; to disable set to <= 0 or > 1 + minReadsAtLocus minimum reads at a locus to enable using the entropy calculation + maxIntervalSize maximum interval size + +@CITATION_SECTION@ + </help> +</tool>