Mercurial > repos > yhoogstrate > varscan_mpileup2snp_from_bam
changeset 22:009759696705
Deleted selected files
author | yhoogstrate |
---|---|
date | Fri, 07 Feb 2014 03:37:22 -0500 |
parents | 798678ae11d8 |
children | 51fa0e2620f7 |
files | all_fasta.loc.sample tool_dependencies.xml varscan2_from_bam.xml |
diffstat | 3 files changed, 0 insertions(+), 229 deletions(-) [+] |
line wrap: on
line diff
--- a/all_fasta.loc.sample Thu Jan 30 08:37:48 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -#This file lists the locations and dbkeys of all the fasta files -#under the "genome" directory (a directory that contains a directory -#for each build). The script extract_fasta.py will generate the file -#all_fasta.loc. This file has the format (white space characters are -#TAB characters): -# -#<unique_build_id> <dbkey> <display_name> <file_path> -# -#So, all_fasta.loc could look something like this: -# -#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa -#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa -#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa -# -#Your all_fasta.loc file should contain an entry for each individual -#fasta file. So there will be multiple fasta files for each build, -#such as with hg19 above. -#
--- a/tool_dependencies.xml Thu Jan 30 08:37:48 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="samtools-mpileup-parallel" version="1.0.19"> - <install version="1.0"> - <actions> - <action type="shell_command">svn checkout https://github.com/mydatascience/parallel-mpileup/trunk samtools-mpileup-parallel && cd samtools-mpileup-parallel && cd $(ls |grep samtools-) && make && cp samtools ../samtools-mpileup-parallel</action> - <action type="move_file"> - <source>samtools-mpileup-parallel</source> - <destination>$INSTALL_DIR/bin</destination> - </action> - <action type="set_environment"> - <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> - <environment_variable name="PATH" action="prepend_to">$REPOSITORY_INSTALL_DIR</environment_variable> - </action> - </actions> - </install> - <readme> - Downloads and installs a modified version of samtools, able to paralellize the mpileup function. - </readme> - </package> - <package name="VarScan" version="2.3.6"> - <install version="1.0"> - <actions> - <action type="shell_command">wget http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.6.jar --output-file="VarScan.v2.3.6.jar"</action> - <action type="move_file"> - <source>../VarScan.v2.3.6.jar</source> - <destination>$INSTALL_DIR/bin</destination> - </action> - <action type="set_environment"> - <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> - <environment_variable name="PATH" action="prepend_to">$REPOSITORY_INSTALL_DIR</environment_variable> - </action> - </actions> - </install> - <readme> - Downloads VarScan2. - </readme> - </package> -</tool_dependency>
--- a/varscan2_from_bam.xml Thu Jan 30 08:37:48 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,172 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<tool id="varscan2_snp_from_bam" name="VarScan2 from BAM"> - <description>VarScan2 SNP detection (1: directly reading a *.bam file, 2: using parallel mpileup generation, to avoid unncessairy I/O overhead.</description> - <requirements> - <requirement type="package" version="1.0.19">samtools-mpileup-parallel</requirement> - <requirement type="package" version="2.3.6">VarScan</requirement> - </requirements> - <command> - samtools-mpileup-parallel mpileup - -t $samtools_threads - -f - #if $reference_genome_source.source_select=="database" - $reference_genome_source.reference_genome - #else - $reference_genome_source.reference_genome - #end if - - #if $extended_parameters_regions.samtools_regions == "region" - -r $extended_parameters_regions.$samtools_r - #elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed" - -l $extended_parameters_regions.$samtools_l - #end if - - #if $extended_parameters.parameters == "extended" - $extended_parameters.samtools_6 - $extended_parameters.samtools_A - $extended_parameters.samtools_B - -C $extended_parameters.samtools_C - -d $extended_parameters.samtools_d - $extended_parameters.samtools_E - -M $extended_parameters.samtools_M - $extended_parameters.samtools_R - -q $extended_parameters.samtools_q - -Q $extended_parameters.samtools_Q - - -e $extended_parameters.samtools_e - -F $extended_parameters.samtools_F - -h $extended_parameters.samtools_h - $extended_parameters.samtools_I - -L $extended_parameters.samtools_L - -m $extended_parameters.samtools_m - -o $extended_parameters.samtools_o - $extended_parameters.samtools_p - -P $extended_parameters.samtools_P - #end if - - #for $alignment in $alignments - ${alignment} - #end for - - | java - -Xmx64G - -jar VarScan.v2.3.6.jar - mpileup2snp - - #if $extended_parameters.parameters == "extended" - --min-coverage $varscan_min_coverage - --min-reads2 $varscan_min_reads2 - --min-avg-qual $varscan_min_avg_qual - --min-var-freq $varscan_min_var_freq - --min-freq-for-hom $varscan_min_freq_for_hom - --p-value $varscan_p_value - $varscan_strand_filter - $varscan_output_vcf - $varscan_variants - #end if - - $varscan_output_vcf - - > $output_table - </command> - - <inputs> - <param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file" help="Mapped reads in BAM or SAM format."/> - - <!-- Find out how to access the reference genome from the BAM file(s) --> - <conditional name="reference_genome_source"> - <param name="source_select" type="select" label="Fasta Source"> - <option value="cached" selected="true">Locally Cached Alignments</option> - <option value="user">Alignments in Your History</option> - </param> - <when value="user"> - <param name="reference_genome" format="fasta" type="data" label="Reference Genome used during alignment (fasta)" help="Reference genome (genome.fa) that corresponds to the *.bam file." /> - </when> - <when value="cached"> - <param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" > - <options from_file="all_fasta.loc"> - <column name="name" index="0"/> - <column name="dbkey" index="1"/> - <column name="value" index="3"/> - <filter type="data_meta" ref="alignments" multiple="true" key="dbkey" column="1" /> - </options> - </param> - </when> - </conditional> - - <conditional name="extended_parameters_regions"> - <param name="samtools_regions" type="select" label="VarScan parameters" help="For more advanced VarScan settings."> - <option value="entire_genome">Entire genome</option> - <option value="region">Specific region</option> - <option value="regions_file_pos">Specific positions (file); list of positions</option> - <option value="regions_file_bed">Specific regions (file); list of regions in BED</option> - </param> - <when value="entire_genome"> - </when> - <when value="region"> - <param type="text" name="samtools_r" label="Samtools: region in which pileup is generated" help="chr:pos or chr:start-end" /> - </when> - <when value="regions_file_pos"> - <param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" /> - </when> - <when value="regions_file_bed"> - <param type="data" name="samtools_l" format="bed" label="Samtools: specific regions (BED)" /> - </when> - </conditional> - - <param type="integer" name="samtools_threads" value="8" min="1" label="Samtools: mpileup threads" /> - - <conditional name="extended_parameters"> - <param name="parameters" type="select" label="VarScan parameters" help="For more advanced VarScan settings."> - <option value="default">Default settings</option> - <option value="extended">Extended settings</option> - </param> - <when value="default"> - </when> - <when value="extended"> - <param type="boolean" name="samtools_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" /> - <param type="boolean" name="samtools_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" /> - <param type="boolean" name="samtools_B" falsevalue="" truevalue=" -B" label="Samtools: disable BAQ computation" /> - <param type="integer" name="samtools_C" value="0" label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" /> - <param type="integer" name="samtools_d" value="250" label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" /> - <param type="boolean" name="samtools_E" falsevalue="" truevalue=" -E" label="Samtools: recalculate extended BAQ on the fly thus ignoring existing BQs" /> - <param type="integer" name="samtools_M" value="60" label="cap mapping quality at INT [60]" /> - <param type="boolean" name="samtools_R" falsevalue="" truevalue=" -R" label="Samtools: ignore RG tags" /> - <param type="integer" name="samtools_q" value="0" label="Samtools: skip alignments with mapQ smaller than INT [0]" /> - <param type="integer" name="samtools_Q" value="13" label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" /> - - <param type="integer" name="samtools_e" value="20" label="Samtools: Phred-scaled gap extension seq error probability [20]" /> - <param type="float" name="samtools_F" value="0.002" label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" /> - <param type="integer" name="samtools_h" value="100" label="Samtools: coefficient for homopolymer errors [100]" /> - <param type="boolean" name="samtools_I" falsevalue="" truevalue=" -I" label="Samtools: do not perform indel calling" /> - <param type="integer" name="samtools_L" value="250" label="Samtools: max per-sample depth for INDEL calling [250]" /> - <param type="integer" name="samtools_m" value="1" label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" /> - <param type="integer" name="samtools_o" value="40" label="Samtools: Phred-scaled gap open sequencing error probability [40]" /> - <param type="boolean" name="samtools_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" /> - <param type="text" name="samtools_P" value="all" label="Samtools: comma separated list of platforms for indels [all]" /> - - <param type="integer" name="varscan_min_coverage" value="8" label="VarScan: Minimum read depth at a position to make a call [8]" /> - <param type="integer" name="varscan_min_reads2" value="2" label="VarScan: PMinimum supporting reads at a position to call variants [2]" /> - <param type="integer" name="varscan_min_avg_qual" value="15" label="VarScan: Minimum base quality at a position to count a read [15]" /> - <param type="float" name="varscan_min_var_freq" value="0.01" label="VarScan: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" /> - <param type="float" name="varscan_min_freq_for_hom" value="0.75" label="VarScan: Minimum frequency to call homozygote [0.75]" /> - <param type="float" name="varscan_p_value" value="0.99" label="VarScan: Default p-value threshold for calling variants [99e-02]" /> - <param type="boolean" name="varscan_strand_filter" falsevalue=" --strand_filter 0" truevalue=" --strand_filter 1" checked="true" label="VarScan: Ignore variants with >90% support on one strand [1]" /> - <param type="boolean" name="varscan_variants" falsevalue=" --variants 0" truevalue=" --variants 1" label="VarScan: Report only variant (SNP/indel) positions [0]" /> - </when> - </conditional> - - <param type="boolean" name="varscan_output_vcf" falsevalue=" --output-vcf 0" truevalue=" --output-vcf 1" label="VarScan: If set to 1, outputs in VCF format" /> - </inputs> - - <outputs> - <data format="tabular" name="output_table" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])}" /> - </outputs> - - <help> - VarScan2.3.6 - ------------ - - Make sure your reference genomes are properly annotated in "tool-data/all_fasta.loc", and linked to the names of the reference used for alignment. - </help> -</tool>