diff ribotaper_part3_main.xml @ 0:08c9da230938 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/ribotaper/ commit a3232e388d52097083f2662ccb26351fdc2f2412
author bgruening
date Tue, 07 Jun 2016 15:47:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ribotaper_part3_main.xml	Tue Jun 07 15:47:26 2016 -0400
@@ -0,0 +1,122 @@
+<tool id="ribotaper_ribosome_profiling" name="ribotaper part 3: ribosome profiling" version="0.1.0">
+    <requirements>
+            <requirement type="package" version="1.3.1">ribotaper</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+
+    <command><![CDATA[
+        tar
+            "xzvf"
+            "$annotation_path"
+
+        &&
+
+        Ribotaper.sh
+            "$ribo_bam"
+            "$rna_bam"
+            "annotation_path"
+            "$read_lenghts_ribo1,$read_lenghts_ribo2,$read_lenghts_ribo3"
+            "$cutoff1,$cutoff2,$cutoff3"
+            "\${GALAXY_SLOTS:-12}"
+
+    ]]></command>
+    <inputs>
+        <param name="annotation_path" type="data" format="compressed_archive" label="annotation_path" help="Please run 'ribotaper part 1' to generate the archive."/>
+        <param name="ribo_bam" type="data" format="BAM" label="ribo_bam" help="Ribo-seq alignment file in BAM format."/>
+        <param name="rna_bam" type="data" format="BAM" label="rna_bam" help="RNA-seq alignment file in BAM format."/>
+        <param name="read_lenghts_ribo1" type="text" value="26"  label="Read length 1" help="Read length 1, which is used for P-site calculation. Default is '26' but it varies a lot in different datasets. Please run 'ribotaper part 2' to deterimine a appropriate value."/>
+        <param name="read_lenghts_ribo2" type="text" value="28"  label="Read length 2" help="Read length 2, which is used for P-site calculation. Default is '28' but it varies a lot in different datasets.  Please run 'ribotaper part 2' to deterimine a appropriate value"/>
+        <param name="read_lenghts_ribo3" type="text" value="29"  label="Read length 3" help="Read length 3, which is used for P-site calculation. Default is '29' but it varies a lot in different datasets.  Please run 'ribotaper part 2' to deterimine a appropriate value"/>
+        <param name="cutoff1" type="text" value="9"  label="Cutoff 1" help="Offset 1, which is used for P-sites calculation. Default is '9' but it varies a lot in different datasets.
+        Please run 'ribotaper part 2' to deterimine a appropriate value."/>
+        <param name="cutoff2" type="text" value="12"  label="Cutoff 2" help="Offset 2, which is used for P-sites calculation. Default is '12' but it varies a lot in different datasets.
+         Please run 'ribotaper part 2' to deterimine a appropriate value."/>
+        <param name="cutoff3" type="text" value="12"  label="Cutoff 3" help="Offset 3, which is used for P-sites calculation. Default is '12' but it varies a lot in different datasets.
+         Please run 'ribotaper part 2' to deterimine a appropriate value."/>
+    </inputs>
+    <outputs>
+        <data name="output1" type="data" format="pdf" from_work_dir="quality_check_plots.pdf" label="QC plots"/>
+        <data name="output2" type="data" format="tabular" from_work_dir="ORFs_genes_found" label="Summary of translated ORFs"/>
+        <data name="output3" type="data" format="tabular" from_work_dir="ORFs_max" label="Translated ORFs (max)"/>
+        <data name="output4" type="data" format="tabular" from_work_dir="ORFs_max_filt" label="Translated ORFs (max_filt)"/>
+        <data name="output5" type="data" format="bed" from_work_dir="translated_ORFs_sorted.bed" label="Translated ORFs (sorted)"/>
+        <data name="output6" type="data" format="bed" from_work_dir="translated_ORFs_filtered_sorted.bed" label="Translated ORFs (filtered/sorted)"/>
+        <data name="output7" type="data" format="fasta" from_work_dir="protein_db_max.fasta" label="Protein DB"/>
+        <data name="output8" type="data" format="pdf" from_work_dir="Final_ORF_results.pdf" label="ORF categories (length/coverage)"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="annotation_path" value="annotation_path.tgz" ftype="compressed_archive"/>
+            <param name="ribo_bam" value="test_ribo.bam"/>
+            <param name="rna_bam" value="test_rna.bam"/>
+            <param name="read_lenghts_ribo1" value="26"/>
+            <param name="read_lenghts_ribo2" value="28"/>
+            <param name="read_lenghts_ribo3" value="29"/>
+            <param name="cutoff1" value="9"/>
+            <param name="cutoff2" value="12"/>
+            <param name="cutoff3" value="12"/>
+            <output name="output2" file="ORFs_genes_found"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+RiboTaper is an analysis pipeline for Ribosome Profiling
+(Ribo-seq) experiments,
+which exploits the triplet periodicity of
+ribosomal footprints to call translated regions.
+See
+https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/ for details.
+
+
+The Ribotaper Galaxy tool set consists of three tools:
+
+  - ``ribotaper part 1``: creation of annotation files
+  - ``ribotaper part 2``: metagene analysis for P-sites definition
+  - ``ribotaper part 3``: ribosome profiling
+
+The order of execution should follow:
+``ribotaper part 1, part 2 and part 3``.
+
+The current tool is ``ribotaper part 3``,
+ribosome profiling.
+
+Outputs
+--------
+
+**QC plots**:
+This plot provides the user statistics about the Ribo-seq and RNA seq data used, together with the assessment of the P-sites calculations.
+Important values are the pie chart showing the agreement between the frame (defined by the P-sites position) and the annotated frame. Reliable P-sites calculations produce an agreement above 90%.
+Very important are also the length/coverage statistics for the Ribo-seq (bottom right):
+This shows how the P-site calculations can be used to detect active translation in regions of different length and coverage, in a way the user can estimate the precision of the Ribo-seq data, and understand the level or resolution the data allows.
+
+**Summary of translated ORFs**:
+Tab-separated values for the number of ORFs found and their corresponding genes, for the different ORF categories.
+
+**Translated ORFs (max, max_filt)**:
+Tab-separated file containing information about detected ORFs.
+Translated ORFs (max_filt) contains ORFs filtered for excessive multimapping and ORFs in non-coding genes overlapping known coding regions (recommended for further analysis).
+
+**Protein DB**:
+Fasta file of the detected ORFs peptide sequence, suitable as an alternative protein database (not filtered for multimapping)
+
+**Translated ORFs  (sorted, filtered/sorted)**:
+BED files with genomic coordinates for the detected ORFs. The total number of P-sites along the ORF is reported on the 5th column.
+
+**ORF categories (length/coverage)**:
+PDF file containing info about the number of ORFs found, together with their length and coverage per category/annotation.
+
+Important notes
+----------------
+
+  - We ran the RiboTaper analysis on an SGE cluster, using 7 cores and h_vmem 8G. For each dataset, the complete RiboTaper workflow (from the bam files to final results) took ~ 1 day.
+
+  - The current RiboTaper framework is not designed to identify and quantify ORFs on different transcripts. This means the transcript annotation is crucial.
+
+  - Be careful about using scaffolds, both in the genome and GTF files, which may slow the whole pipeline.
+
+]]></help>
+    <citations>
+        <citation type="doi">10.1038/nmeth.3688</citation>
+    </citations>
+</tool>