comparison junction_saturation.xml @ 3:71ed55a3515a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rseqc commit 37fb1988971807c6a072e1afd98eeea02329ee83
author iuc
date Tue, 14 Mar 2017 10:22:57 -0400
parents f92b87abef3d
children 017eaaf58e5e
comparison
equal deleted inserted replaced
2:ebadf9ee2d08 3:71ed55a3515a
1 <tool id="junction_saturation" name="Junction Saturation"> 1 <tool id="rseqc_junction_saturation" name="Junction Saturation" version="@WRAPPER_VERSION@">
2 <description>detects splice junctions from each subset and compares them to reference gene model</description> 2 <description>detects splice junctions from each subset and compares them to reference gene model</description>
3 <requirements>
4 <requirement type="package" version="2.15.1">R</requirement>
5 <requirement type="package" version="2.3.7">rseqc</requirement>
6 </requirements>
7 <command interpreter="python"> junction_saturation.py -i $input -o output -r $refgene -m $intronSize -v $minSplice
8 3
9 #if $percentiles.specifyPercentiles 4 <macros>
10 -l $percentiles.lowBound -u $percentiles.upBound -s $percentiles.percentileStep 5 <import>rseqc_macros.xml</import>
11 #end if 6 </macros>
12 7
13 </command> 8 <expand macro="requirements" />
14 <inputs>
15 <param name="input" type="data" format="bam,sam" label="input bam/sam file" />
16 <param name="refgene" type="data" format="bed" label="reference gene model" />
17 <param name="intronSize" type="integer" label="Minimum intron size (bp, default=50)" value="50"/>
18 <param name="minSplice" type="integer" label="Minimum coverage (default=1)" value="1" />
19 <conditional name="percentiles">
20 <param name="specifyPercentiles" type="boolean" label="Specify sampling bounds and frequency" value="false"/>
21 <when value="true">
22 <param name="lowBound" type="integer" value="5" label="Lower Bound Sampling Frequency (bp, default=5)" />
23 <param name="upBound" type="integer" value="100" label="Upper Bound Sampling Frequency (bp, default=100)" />
24 <param name="percentileStep" type="integer" value="5" label="Sampling increment (default=5)" />
25 </when>
26 </conditional>
27 </inputs>
28 <outputs>
29 <data format="r" name="outputr" from_work_dir="output.junctionSaturation_plot.r"/>
30 <data format="pdf" name="outputpdf" from_work_dir="output.junctionSaturation_plot.pdf"/>
31 </outputs>
32 <tests>
33 <test>
34 <param name="input" value="Pairend_nonStrandSpecific_36mer_Human_hg19.bam" />
35 <param name="refgene" value="hg19_RefSeq.bed" />
36 <output name="outputr" file="junsatout.junctionSaturation_plot.r" />
37 <output name="outputpdf" file="junsatout.junctionSaturation_plot.pdf" />
38 </test>
39 </tests>
40 <help>
41 .. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
42 9
43 ----- 10 <expand macro="stdio" />
44 11
45 About RSeQC 12 <version_command><![CDATA[junction_saturation.py --version]]></version_command>
46 +++++++++++
47 13
48 The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. 14 <command><![CDATA[
15 junction_saturation.py
16 --input-file '${input}'
17 --refgene '${refgene}'
18 --out-prefix output
19 --min-intron ${min_intron}
20 --min-coverage ${min_coverage}
21 --mapq ${mapq}
22 #if str($percentiles_type.percentiles_type_selector) == "specify":
23 --percentile-floor ${percentiles_type.lowBound}
24 --percentile-ceiling ${percentiles_type.upBound}
25 --percentile-step ${percentiles_type.percentileStep}
26 #end if
27 ]]>
28 </command>
49 29
50 The RSeQC package is licensed under the GNU GPL v3 license. 30 <inputs>
31 <expand macro="bam_sam_param" />
32 <expand macro="refgene_param" />
33 <expand macro="min_intron_param" />
34 <param name="min_coverage" type="integer" label="Minimum number of supporting reads to call a junction (default=1)" value="1" help="(--min-coverage)" />
35 <expand macro="mapq_param" />
36 <conditional name="percentiles_type">
37 <param name="percentiles_type_selector" type="select" label="Sampling bounds and frequency">
38 <option value="default" selected="true">Default sampling bounds and frequency</option>
39 <option value="specify">Specify sampling bounds and frequency</option>
40 </param>
41 <when value="specify">
42 <param name="lowBound" type="integer" value="5" label="Lower Bound Sampling Frequency (bp, default=5)" help="(--percentile-floor)">
43 <validator type="in_range" min="0" max="100" />
44 </param>
45 <param name="upBound" type="integer" value="100" label="Upper Bound Sampling Frequency (bp, default=100)" help="(--percentile-ceiling)">
46 <validator type="in_range" min="0" max="100" />
47 </param>
48 <param name="percentileStep" type="integer" value="5" label="Sampling increment (default=5)" help="(--percentile-step)">
49 <validator type="in_range" min="0" max="100" />
50 </param>
51 </when>
52 <when value="default"/>
53 </conditional>
54 <expand macro="rscript_output_param" />
55 </inputs>
56
57 <outputs>
58 <expand macro="pdf_output_data" filename="output.junctionSaturation_plot.pdf" />
59 <expand macro="rscript_output_data" filename="output.junctionSaturation_plot.r" />
60 </outputs>
61
62 <tests>
63 <test>
64 <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" />
65 <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed" />
66 <param name="rscript_output" value="true" />
67 <output name="outputr" file="output.junctionSaturation_plot.r" compare="sim_size">
68 <assert_contents>
69 <has_line line="pdf('output.junctionSaturation_plot.pdf')" />
70 <has_line line="x=c(5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100)" />
71 </assert_contents>
72 </output>
73 <output name="outputpdf" file="output.junctionSaturation_plot.pdf" compare="sim_size" />
74 </test>
75 </tests>
76
77 <help><![CDATA[
78 junction_saturation.py
79 ++++++++++++++++++++++
80
81 It's very important to check if current sequencing depth is deep enough to perform
82 alternative splicing analyses. For a well annotated organism, the number of expressed genes
83 in particular tissue is almost fixed so the number of splice junctions is also fixed. The fixed
84 splice junctions can be predetermined from reference gene model. All (annotated) splice
85 junctions should be rediscovered from a saturated RNA-seq data, otherwise, downstream
86 alternative splicing analysis is problematic because low abundance splice junctions are
87 missing. This module checks for saturation by resampling 5%, 10%, 15%, ..., 95% of total
88 alignments from BAM or SAM file, and then detects splice junctions from each subset and
89 compares them to reference gene model.
51 90
52 Inputs 91 Inputs
53 ++++++++++++++ 92 ++++++++++++++
54 93
55 Input BAM/SAM file 94 Input BAM/SAM file
56 Alignment file in BAM/SAM format. 95 Alignment file in BAM/SAM format.
57 96
58 Reference gene model 97 Reference gene model
59 Gene model in BED format. 98 Gene model in BED format.
60 99
61 Sampling Percentiles - Upper Bound, Lower Bound, Sampling Increment (defaults= 100, 5, and 5) 100 Sampling Percentiles - Upper Bound, Lower Bound, Sampling Increment (defaults= 100, 5, and 5)
62 Sampling starts from the Lower Bound and increments to the Upper Bound at the rate of the Sampling Increment. 101 Sampling starts from the Lower Bound and increments to the Upper Bound at the rate of the Sampling Increment.
63 102
64 Minimum intron length (default=50) 103 Minimum intron length (default=50)
65 Minimum intron length (bp). 104 Minimum intron length (bp).
66 105
67 Minimum coverage (default=1) 106 Minimum coverage (default=1)
68 Minimum number of supportting reads to call a junction. 107 Minimum number of supportting reads to call a junction.
69 108
70 Output 109 Output
71 ++++++++++++++ 110 ++++++++++++++
72 111
73 1. output.junctionSaturation_plot.r: R script to generate plot 112 1. output.junctionSaturation_plot.r: R script to generate plot
74 2. output.junctionSaturation_plot.pdf 113 2. output.junctionSaturation_plot.pdf
75 114
76 .. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/junction_saturation.png 115 .. image:: $PATH_TO_IMAGES/junction_saturation.png
116 :height: 600 px
117 :width: 600 px
118 :scale: 80 %
77 119
78 In this example, current sequencing depth is almost saturated for "known junction" (red line) detection because the number of "known junction" reaches a plateau. In other words, nearly all "known junctions" (expressed in this particular tissue) have already been detected, and continue sequencing will not detect additional "known junction" and will only increase junction coverage (i.e. junction covered by more reads). While current sequencing depth is not saturated for novel junctions (green). 120 In this example, current sequencing depth is almost saturated for "known junction" (red line) detection because the number of "known junction" reaches a plateau. In other words, nearly all "known junctions" (expressed in this particular tissue) have already been detected, and continue sequencing will not detect additional "known junction" and will only increase junction coverage (i.e. junction covered by more reads). While current sequencing depth is not saturated for novel junctions (green).
79 121
122 @ABOUT@
80 123
81 </help> 124 ]]>
125 </help>
126
127 <expand macro="citations" />
128
82 </tool> 129 </tool>