changeset 0:28c319539b42 draft default tip

planemo upload for repository https://github.com/tbischler/PEAKachu commit 21413560ba7ac260a92531c08899c4a342d1660d
author bgruening
date Thu, 15 Feb 2018 11:09:43 -0500
parents
children
files peakachu.xml test-data/test1_+xl.bam test-data/test1_-xl.bam test-data/test1_MA.png test-data/test1_peaks.tsv
diffstat 5 files changed, 198 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/peakachu.xml	Thu Feb 15 11:09:43 2018 -0500
@@ -0,0 +1,195 @@
+<tool id="peakachu" name="PEAKachu" version="0.1.0.0">
+    <description>Calls Peaks in CLIP data</description>
+    <requirements>
+        <requirement type="package" version="3.6">python</requirement>
+        <requirement type="package" version="0.1.0">peakachu</requirement>
+    </requirements>
+    <version_command>
+    <![CDATA[
+        peakachu --version
+    ]]>
+    </version_command>
+    <command detect_errors="aggressive">
+    <![CDATA[
+        #for $i, $clib in enumerate($controlLibs):
+            #if $clib:
+                ln -s -f '$clib' ${i}.c.bam &&
+                ln -s -f '$clib.metadata.bam_index' ${i}.c.bam.bai &&
+            #end if
+        #end for
+        #for $j, $elib in enumerate($experimentLibs):
+            ln -s -f '$elib' ${j}.e.bam &&
+            ln -s -f '$elib.metadata.bam_index' ${j}.e.bam.bai &&
+        #end for
+
+        pwd &&
+        mkdir ./tmp_output && 
+        peakachu 
+        ${mode.mode_selector}
+        --exp_libs 
+        #for $i, $elib in enumerate($experimentLibs):
+            '${i}.e.bam'
+        #end for
+        #for $i, $clib in enumerate($controlLibs):
+            #if $clib and $i == 0:
+                --ctr_libs 
+            #end if
+            #if $clib:
+                '${i}.c.bam'
+            #end if
+        #end for
+        $pairwise_replicates
+        $paired_end
+        --max_insert_size $max_insert_size
+        --features '$features'
+        --sub_features '$sub_features'
+        --max_proc "\${GALAXY_SLOTS:-1}"
+        --output_folder ./tmp_output
+        #if str($mode.mode_selector) == 'adaptive':
+            --min_cluster_expr_frac $mode.min_cluster_expr_frac
+            --min_block_overlap $mode.min_block_overlap
+            --min_max_block_expr $mode.min_max_block_expr
+        #elif str($mode.mode_selector) == 'window':
+            --window_size $mode.window_size
+            --step_size $mode.step_size
+            --stat_test $mode.stat_test
+            --het_p_val_threshold $mode.het_p_val_threshold
+            --rep_pair_p_val_threshold $mode.rep_pair_p_val_threshold
+        #end if
+       --norm_method $mode.norm_method.norm_method_selector
+        #if str($mode.norm_method.norm_method_selector) == 'manual':
+            --size_factors $size_factors
+        #end if
+        --mad_multiplier $mad_multiplier
+        --fc_cutoff $fc_cutoff
+        --padj_threshold $padj_threshold
+
+        &&
+        head -n 1 -q ./tmp_output/peak_tables/*.csv | head -n 1 > peaks.tsv &&
+        tail -n +2 -q ./tmp_output/peak_tables/*.csv >> peaks.tsv &&
+        mv peaks.tsv '$peak_tables' &&
+        mv ./tmp_output/plots/Initial*.png '$MA_plot'
+
+    ]]>
+    </command>
+    <inputs>
+        <param name="experimentLibs" type="data" format="bam" label="Experiment Libraries" multiple="True"/>
+        <param name="controlLibs" type="data" format="bam" label="Control Libraries" multiple="True" optional="True"/>
+        <param argument="--pairwise_replicates" type="boolean" truevalue="--pairwise_replicates" falsevalue="" checked="False" label="Pairwise Replicates" /> 
+        <param argument="--paired_end" type="boolean" truevalue="--paired_end" falsevalue="" checked="False" label="Paired End" /> 
+        <param argument="--max_insert_size" type="integer" value="50" label="Maximum Insert Size"/>
+        <!-- The gff feature is not implemented, because the function can easily be accomplished with featureCount or other tools
+        <param name="gffs" type="data" format="gff" label="Annotation" optional="True" multiple="True"/-->
+        <param argument="--features" type="text" label="Features">
+            <sanitizer>
+                <valid initial="default"/>
+            </sanitizer>
+        </param>
+        <param argument="--sub_features" type="text" label="Sub-Features">
+            <sanitizer>
+                <valid initial="default"/>
+            </sanitizer>
+        </param>
+
+        <conditional name="mode">
+            <param name="mode_selector" type="select" label="Select Mode" help="These modes work differently.">
+                <option value="adaptive" selected="True">Adaptive</option>
+                <option value="window">Window</option>
+                <!-- The following options are not implemented because they are vastly different and should be implemented as their own tool, if need be.
+                <option value="coverage">Coverage</option>
+                <option value="consensus_peak">Consensus Peak</option-->
+            </param>
+            <when value="adaptive">
+                <param argument="--min_cluster_expr_frac" label="Minimum cluster Expression Fraction" help="Minimum fraction of a block in a cluster for further consideration." type="float" value="0.01"/>
+                <param argument="--min_block_overlap" label="Minimum Block Overlap" help="Minimum fraction of the width of blocks for merging." type="float" value="0.5"/>
+                <param argument="--min_max_block_expr" label="Minimum Block Expression" help="Minimum fraction of expression of blocks for merging." type="float" value="0.1"/>
+                <conditional name="norm_method">
+                    <param name="norm_method_selector" type="select" label="Normalisation Method.">
+                        <option value="deseq" selected="True">DESeq2</option>
+                        <option value="manual">Manual</option>
+                        <option value="none">None</option>
+                    </param>
+                    <when value="deseq"/>
+                    <when value="none"/>
+                    <when value="manual">
+                        <param argument="--size_factors" label="Size Factors" type="text" help="Size factors have to be seperated by SPACE">
+                            <sanitizer>
+                                <valid initial="default"/>
+                            </sanitizer>
+                        </param>
+                    </when>
+                </conditional>
+            </when>
+            <when value="window">
+                <param argument="--window_size" label="Window Size" type="integer" value="25"/>
+                <param argument="--step_size" label="Step Size" type="integer" value="5"/>
+                <param name="stat_test" type="select" label="Statistical Test">
+                    <option value="gtest" selected="True">gtest</option>
+                    <option value="deseq">DESeq2</option>
+                </param>
+                <conditional name="norm_method">
+                    <param name="norm_method_selector" type="select" label="Normalisation Method.">
+                        <option value="tmm" selected="True">TMM</option>
+                        <option value="deseq">DESeq2</option>
+                        <option value="count">Count</option>
+                        <option value="manual">Manual</option>
+                        <option value="none">None</option>
+                    </param>
+                    <when value="deseq"/>
+                    <when value="tmm"/>
+                    <when value="count"/>
+                    <when value="none"/>
+                    <when value="manual">
+                        <param argument="--size_factors" label="Size Factors" type="text" help="Size factors have to be seperated by SPACE">
+                            <sanitizer>
+                                <valid initial="default"/>
+                            </sanitizer>
+                        </param>
+                    </when>
+                </conditional>
+                <param argument="--het_p_val_threshold" label="Heterogeneous p-value Threshold" type="float" value="0.01"/>
+                <param argument="--rep_pair_p_val_threshold" label="Paired p-value Threshold" type="float" value="0.05"/>
+            </when>
+        </conditional>
+        <param argument="--mad_multiplier" label="Mad Multiplier" type="float" value="2.0"/>
+        <param argument="--fc_cutoff" label="Fold Change Threshold" type="float" value="2.0"/>
+        <param argument="--padj_threshold" type="float" label="Adjusted p-value Threshold" value="0.05"/>  
+    </inputs>
+    <outputs>
+        <data format="tabular" name="peak_tables" label="${tool.name} ${mode.mode_selector} on ${on_string}: peaks"/>
+        <data format="png" name="MA_plot" label="${tool.name} ${mode.mode_selector} on ${on_string}: MA plot"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="experimentLibs" value="test1_+xl.bam"/>
+            <param name="controlLibs" value="test1_-xl.bam"/>
+            <output name="peak_tables" ftype="tabular" file="test1_peaks.tsv"/>
+            <output name="MA_plot" ftype="png" file="test1_MA.png"/>
+        </test>
+    </tests>
+    <help>
+    <![CDATA[
+**PEAKachu**
+
+PEAKachu is a tool for the accurate mapping of RBP binding sites based on CLIP-seq and RIP-seq data.
+PEAKachu uses signal and control libraries (ideally more than three each) to detect binding sites.
+It implements two peak calling approaches
+
+**adaptive**
+
+The adaptive approach applies a three-step procedure to detect regions that are significantly enriched over controls.
+
+- blockbuster is applied to the pooled libraries to combine similar sets of reads into blocks
+- blocks are decomposed into peaks by iteratively applying a block merging heuristic
+- peaks with significant enrichment of signal over control libraries are determined using DESeq2
+
+**windowed**
+
+The windowed approach subdivides the genome into overlapping regions.
+After filtering of lowly expressed regions and library normalization (either using manual size factors, TMM, or DESeq2), this approach determines significantly enriched windows using eiterh DESeq2 or repeated G-tests of goodness-of-fit.
+
+    ]]>
+    </help>
+    <citations>
+    </citations>
+</tool>
Binary file test-data/test1_+xl.bam has changed
Binary file test-data/test1_-xl.bam has changed
Binary file test-data/test1_MA.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1_peaks.tsv	Thu Feb 15 11:09:43 2018 -0500
@@ -0,0 +1,3 @@
+replicon	peak_id	peak_start	peak_end	peak_strand	0.e	0.c	base_means	fold_change	feature_type	feature_start	feature_end	feature_strand	feature_locus_tag	feature_name	subfeature_type	feature_product	overlap_length
+NC_016810.1	1	514559	514582	-	72.56031973468696	18.605210188381267	45.58276496153411	3.900000000000001	intergenic	NA	NA	NA	NA	NA	NA	NA	NA
+NC_016810.1	2	514559	514584	+	70.94786818502725	18.605210188381267	44.776539186704255	3.813333333333334	intergenic	NA	NA	NA	NA	NA	NA	NA	NA