Mercurial > repos > bgruening > footprint
diff footprint.xml @ 0:eefd8499c801 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/footprint commit 3a110c632920d1ed94b78053184978040df3edaf
author | bgruening |
---|---|
date | Tue, 02 May 2017 11:36:04 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/footprint.xml Tue May 02 11:36:04 2017 -0400 @@ -0,0 +1,212 @@ +<?xml version="1.0" encoding="UTF-8"?> +<tool id="footprint" name="footprint" version="1.0.0"> + <requirements> + <requirement type="package" version="1.0.0">footprint</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command><![CDATA[ + ln -s '$bam_file' ./bam_file.bam + + && + + find_footprints.sh + + ./bam_file.bam + + '$chrom_sizes' + + '$motif_coords' + + ## genome source + #if $refGenomeSource.genomeSource == "history": + '$refGenomeSource.ownFile' + #else + '$refGenomeSource.builtin.fields.path' + #end if + + $factor_name + + '$bias_file' + + '$peak_file' + + $no_of_components + + $background + + $fixed_bg + + && + mv *.PARAM PARAM + && + mv *.RESULTS RESULTS + && + mv *.plot2.png plot2.png + && + mv *.plot1.png plot1.png +]]> + </command> + <inputs> + <param name="bam_file" type="data" format="BAM" label="alignment bam file" help="" /> + <param name="chrom_sizes" type="data" format="tablular" label="chromosome length" help="" /> + <param name="motif_coords" type="data" format="BED" label="coordinates of motif" help="" /> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" + label="Will you select a reference genome from your + history or use a built-in genome?" + help="The version of genome against which the reads were aligned."> + <option value="fai" selected="True"> + Use a built-in genome</option> + <option value="history"> + Use a genome from my current history</option> + </param> + <when value="fai"> + <param name="builtin" type="select" + label="Select a reference genome"> + <options from_data_table="sam_fa_indices"> + <filter type="sort_by" column="1" /> + <validator type="no_options" + message="A built-in reference genome is not available + for the build associated with the selected input file"/> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="fasta" + label="Select the reference genome" help="Genome sequences in FASTA format" /> + </when> + </conditional> + <param name="factor_name" type="text" label="transcription factor" help="e.g. CTCF" /> + <param name="bias_file" type="data" format="tabular,txt" label="cleavage/transposition bias" help="" /> + <param name="peak_file" type="data" format="tabular" label="coordinates of ChIP-seq peaks" help="" /> + <param name="no_of_components" type="select" label="number of components"> + <option value="2" selected="true">2</option> + <option value="3">3</option> + </param> + <param name="background" type="select" label="background components"> + <option value="Seq" selected="true">Seq</option> + <option value="Flat">Flat</option> + </param> + <param name="fixed_bg" type="select" label="fixed background component"> + <option value="TRUE" selected="true">TRUE</option> + <option value="FALSE">FALSE</option> + </param> + </inputs> + <outputs> + <data name="RESULTS" format="tabular" from_work_dir="RESULTS" label="${tool.name} on ${on_string}: results" /> + <data name="PARAM" format="txt" from_work_dir="PARAM" label="${tool.name} on ${on_string}: parameters" /> + <data name="plot1" format="png" from_work_dir="plot1.png" label="${tool.name} on ${on_string}: plot 1" /> + <data name="plot2" format="png" from_work_dir="plot2.png" label="${tool.name} on ${on_string}: plot 2" /> + </outputs> + <tests> + <test> + <param name="bam_file" value="input_ATAC_HEK293_hg19_chr1.bam" /> + <param name="chrom_sizes" value="input_hg19.chr1.chrom.size" /> + <param name="motif_coords" value="input_CTCF_motifs_hg19_chr1.bed" /> + <param name="genomeSource" value="history" /> + <param name="ownFile" value="input_hg19_chr1.fa" /> + <param name="factor_name" value="CTCF" /> + <param name="bias_file" value="input_SeqBias_ATAC.txt" /> + <param name="peak_file" value="input_CTCF_HEK293_chip_hg19_chr1.bed" /> + <param name="no_of_components" value="2" /> + <param name="background" value="Seq" /> + <param name="fixed_bg" value="TRUE" /> + <output name="RESULTS" file="output.RESULTS" ftype="tabular" compare="sim_size"/> + <output name="PARAM" file="output.PARAM" ftype="txt" compare="sim_size"/> + <output name="plot1" file="output_plot1.png" ftype="png" compare="sim_size" delta="15000" /> + <output name="plot2" file="output_plot2.png" ftype="png" compare="sim_size" delta="15000" /> + </test> + </tests> + <help><![CDATA[.. class:: infomark + +**Purpose** + +This is a pipeline to find transcription factor footprints in ATAC-seq or DNase-seq data. + + +----- + +.. class:: infomark + +**Inputs** + +alignment bam file + * A bam file from the ATAC-seq or DNase-seq experiment. + +chromosome length + * A tab delimited file with 2 columns. + * The first column is the chromosome name and the second column is the chromosome length for the appropriate organism and genome build. + * Example: chr1 10000000 + +coordinates of motif + * A 6-column bed file with the coordinates of motif matches (eg resulting from scanning the genome with a PWM) for the transcription factor of interest. + * The 6 columns should contain chromosome, start coordinate, end coordinate, name, score and strand information in this order. The coordinates should be closed (1-based). + * Example: chr1 24782 24800 . 11.60 - + +transcription factor + * The name of the transcription factor of interest supplied by the user, e.g. CTCF. + +cleavage/transposition bias + * The cleavage/transposition bias of the different protocols, for all 6-mers. + * Provided `options`_: ATAC, DNase double hit or DNase single hit protocols. + + .. _options: https://ohlerlab.mdc-berlin.de/software/Reproducible_footprinting_139/ + +coordinates of ChIP-seq peaks + * A file with the coordinates of the ChIP-seq peaks for the transcription factor of interest. + * The format is flexible as long as the first 3 columns (chromosome, start coordinate, end coordinate) are present. + * Example: chr1 237622 237882 + +number of components + * Total number of footprint and background components that should be learned from the data. + * Options are 2 (1 fp and 1 bg) and 3 (2 fp and 1 bg) components. + +background components + * The mode of initialization for the background component. Options are "Flat" or "Seq". + * Choosing "Flat" initializes this component as a uniform distribution. + * Choosing "Seq" initializes it as the signal profile that would be expected solely due to the protocol bias (given by the cleavage/transposition bias file). + +fixed background component + * Whether the background component should be kept fixed. + * Options are TRUE or FALSE. + * Setting "TRUE" keeps this component fixed, whereas setting "FALSE" lets it be reestimated during training. + +----- + +.. class:: infomark + +**Outputs** + +results + * The results of the footprinting analysis. + * The first 6 columns harbor the motif information (identical to the 'coordinates of motif'). + * The 7th column has the footprint score (log-odds of footprint versus background) for each motif instance. + * The following columns show the probabilities for the individual footprint and background components. + +parameters + * Gives the trained parameters for the footprint and background components. + * It includes as many lines as components (eg the first line has the parameters for the first component). + +plot 1 + * A plot with two panels, showing the initial components above and the final trained components below. + * The plotted values for the final components are given in the 'parameters' output file explained above. + +plot 2 + * A plot only with the final trained components. + * In a model where 2 components are used, this plot is identical to the bottom panel in plot1. + * When 3 components are used, this plot shows the weighted average of the 2 footprint components as the final footprint profile. + +]]></help> + <citations> + <citation type="bibtex">@ARTICLE{footprint, + author = {Aslihan Karabacak and Uwe Ohler}, + title = {To submit}, + journal = {}, + year = {}, + volume = {}, + pages = {} + }</citation> + </citations> +</tool>