comparison normalize.xml @ 0:83dfe38f6a09 draft

Uploaded
author nikos
date Tue, 04 Nov 2014 14:28:45 -0500
parents
children 2ae336f19de0
comparison
equal deleted inserted replaced
-1:000000000000 0:83dfe38f6a09
1 <tool id="rna_probing_normalize" version="0.1" name="Normalize" force_history_refresh="True">
2 <description>RNA probing data </description>
3 <requirements>
4 <requirement type="package" version="3.1.1">R_3_1_1</requirement>
5 <requirement type="R-module">RNAprobR</requirement>
6 <requirement type="package" version="1.0.0">RNAprobR</requirement>
7 <requirement type="set_environment">RNA_PROBING_SCRIPT_PATH</requirement>
8 </requirements>
9
10 <command interpreter="Rscript">
11 normalize.R
12
13 -t $treated
14 -c $control
15
16 --method $euc_method.euc
17
18 #if str($euc_method.euc) == "Fu":
19 --fuComplexity $euc_method.complexity
20 #else if str($euc_method.euc) == "HRF-Seq":
21 --k2nTreated $euc_method.k2n_treated
22 --k2nControl $euc_method.k2n_control
23 #end if
24
25 --reference $fasta
26
27 #if str($compdata) == 'yes':
28 --compdata
29 #end if
30
31 #if str($dtcr.check) == 'yes':
32 --dtcr
33 --dtcrWindow $dtcr.wsize
34 --dtcrToZero $dtcr.zero
35 #end if
36
37 #if str($slograt.check) == 'yes':
38 --slograt
39 --slogratWindow $slograt.wsize
40 --depthCorrection $slograt.depth_cor
41 --pseudocount $slograt.pseudocount
42 #end if
43
44 #if str($swinsor.check) == 'yes':
45 --swinsor
46 --swinsorWindow $swinsor.wsize
47 --winsorLevel $swinsor.winsor_level
48 --fixQuantile $swinsor.only_top
49 #end if
50
51 --ntOffset $nt_offset
52 --cutoff $cutoff
53
54 #if str($bedgraph.check) == 'yes':
55 --bedgraph
56 --bed "$bedgraph.bed_file"
57 --genome "$bedgraph.genome"
58 --trackName "$bedgraph.track_name"
59 #end if
60
61 -o 'output_dir' > /dev/null
62
63 </command>
64
65 <inputs>
66 <param name="treated" type="data" format="tabular" label="Unique Barcodes/Counts Treated File" help="'Summarize Unique Barcodes' tool output." />
67 <param name="control" type="data" format="tabular" optional="True" label="Unique Barcodes/Counts Control File" help="'Summarize Unique Barcodes' tool output." />
68 <conditional name="euc_method">
69 <param name="euc" type="select" label="EUC* method" help="*Estimated unique counts.">
70 <option value="counts" selected="True">Counts - Keep unique counts</option>
71 <option value="Fu">Fu - Fu GK et al. PNAS 2011 (Binomial Distribution) formula</option>
72 <option value="HRF-Seq">HRF-Seq - as described in Kieplinski and Vinther, NAR 2014</option>
73 </param>
74 <when value="counts" />
75 <when value="Fu">
76 <param name="complexity" type="integer" value="" min="1" label="Random barcode complexity (m)" help="Highest possible number of unique barcodes. e.g For 7 nucleotide, fully degenerate random barcodes (NNNNNNN) then m = 16384 (m=4^7)." />
77 </when>
78 <when value="HRF-Seq">
79 <param name="k2n_treated" type="data" format="text" label="k2n Treated File" help="'Summarize Unique Barcodes' output." />
80 <param name="k2n_control" type="data" format="text" label="k2n Control File" optional="True" help="'Summarize Unique Barcodes' output." />
81 </when>
82 </conditional>
83 <param name="fasta" type="data" format="fasta" optional="True" label="Reference Fasta sequence" help="Used to report nucleotide at each position." />
84 <param name="compdata" type="boolean" label="Print raw values" checked="True" truevalue="yes" falsevalue="" help="Get Priming Count, Termination Count, Coverage and TCR values" />
85 <conditional name="dtcr">
86 <param name="check" type="select" label="deltaTCR Normalization" help="Requires Control dataset!">
87 <option value="no" selected="True">Do not apply</option>
88 <option value="yes">Apply</option>
89 </param>
90 <when value="no"/>
91 <when value="yes">
92 <param name="wsize" label="Window Size" type="integer" value="3" min="1" size="3" help="Integer. Use only odd numbers to ensure that windows are centred on a nucleotide of interest." />
93 <param name="zero" label="Bring to zero" type="boolean" checked="True" truevalue="TRUE" falsevalue="FALSE" help="Replace negative deltaTCR values with 0." />
94 </when>
95 </conditional>
96 <conditional name="slograt">
97 <param name="check" type="select" label="Smooth Log2 Ratio Normalization" help="Requires Control dataset!">
98 <option value="no" selected="True">Do not apply</option>
99 <option value="yes">Apply</option>
100 </param>
101 <when value="no"/>
102 <when value="yes">
103 <param name="wsize" label="Window Size" type="integer" value="5" min="1" size="3" help="Integer. Use only odd numbers to ensure that windows are centred on a nucleotide of interest." />
104 <param name="depth_cor" type="select" label="Depth Correction">
105 <option value="no">No - counts are used as given</option>
106 <option value="all" selected="True">All - treated counts are multiplied by sum of control counts and divided by sum of treated counts</option>
107 <option value="RNA">RNA - as in "All" but per RNA basis</option>
108 </param>
109 <param name="pseudocount" type="integer" value="5" size="3" label="Pseudocount" help="Pseudocounts to be added to each nucleotide prior to calculating log2 ratio." />
110 </when>
111 </conditional>
112 <conditional name="swinsor">
113 <param name="check" type="select" label="Sliding window Winsorization" help="Doesn't require Control dataset.">
114 <option value="no" selected="True">Do not apply</option>
115 <option value="yes">Apply</option>
116 </param>
117 <when value="no"/>
118 <when value="yes">
119 <param name="wsize" label="Window Size" type="integer" value="71" min="1" size="3" help="Integer. Use only odd numbers to ensure that windows are centred on a nucleotide of interest." />
120 <param name="winsor_level" type="float" value="0.9" min="0.0" max="1.0" size="3" label="Winsor level" help="Indicates where should winsorization boundaries be made. Value 0.9 indicates that top 5% of data will be reduced to 95% quantile and bottom 5% to 5% quantile (default: 0.9)" />
121 <param name="only_top" type="boolean" label="Fix quantile" checked="False" truevalue="TRUE" falsevalue="FALSE" help="If checked the the bottom quantile is fixed to 0." />
122 </when>
123 </conditional>
124
125 <param name="cutoff" label="Cutoff Length" type="integer" value="1" min="0" size="3" help="Only inserts of this length or longer will be used for processing." />
126 <param name="nt_offset" label="Nucleotide Offset" type="integer" value="1" size="3" help="How many nucleotides before modification does the reverse transcription terminate? e.g. for HRF-Seq offset = 1" />
127
128 <conditional name="bedgraph">
129 <param name="check" type="select" label="Produce BedGraph output" help="Can be displayed directly on UCSC browser. One file per normalisation method." >
130 <option value="no" selected="True">No</option>
131 <option value="yes">Yes</option>
132 </param>
133 <when value="yes">
134 <param name="bed_file" type="data" format="bed" label="Transcripts ins BED format" help="12 column BED file containing trancript definitions." />
135 <param name="genome" type="text" label="Genome Build" help="E.g. hg19" />
136 <param name="track_name" type="text" label="Track Name" size="20" value="Track Name" />
137 </when>
138 <when value="no" />
139 </conditional>
140 <!-- <param name="plots" type="boolean" checked="False" truevalue="True" falsevalue="False" label="Produce plots" help="pdf format" /> -->
141 </inputs>
142
143 <outputs>
144 <data format="tabular" name="normalized" label="${tool.name} on ${on_string} (tabular)" from_work_dir="output_dir/norm_df.txt" />
145 <data format="bedgraph" name="bedgraph_dtcr" label="${tool.name} on ${on_string}: dTCR (bedGraph)" from_work_dir="output_dir/dtcr.bedgraph">
146 <filter> bedgraph['check'] == 'yes' and dtcr['check'] == 'yes' </filter>
147 <actions>
148 <conditional name="bedgraph.check">
149 <when value="yes">
150 <action type="metadata" name="dbkey">
151 <option type="from_param" name="bedgraph.genome" />
152 </action>
153 </when>
154 </conditional>
155 </actions>
156 </data>
157 <data format="bedgraph" name="bedgraph_slograt" label="${tool.name} on ${on_string}: Smoot Log2ratio (bedGraph)" from_work_dir="output_dir/slograt.bedgraph">
158 <filter> bedgraph['check'] == 'yes' and slograt['check'] == 'yes' </filter>
159 <actions>
160 <conditional name="bedgraph.check">
161 <when value="yes">
162 <action type="metadata" name="dbkey">
163 <option type="from_param" name="bedgraph.genome" />
164 </action>
165 </when>
166 </conditional>
167 </actions>
168 </data>
169 <data format="bedgraph" name="bedgraph_swinsor" label="${tool.name} on ${on_string}: Smooth Winsorisation (bedGraph)" from_work_dir="output_dir/swinsor.bedgraph">
170 <filter> bedgraph['check'] == 'yes' and swinsor['check'] == 'yes' </filter>
171 <actions>
172 <conditional name="bedgraph.check">
173 <when value="yes">
174 <action type="metadata" name="dbkey">
175 <option type="from_param" name="bedgraph.genome" />
176 </action>
177 </when>
178 </conditional>
179 </actions>
180 </data>
181 </outputs>
182
183 <tests>
184 </tests>
185
186 <help>
187 **What it does**
188
189 Normalize tool performs termini based signal detection using summarized counts of sequenced reads (**Read Counts**) or sets of reads carrying the same barcode seqeuence (**Unique Barcodes**). The number of unique barcodes can be converted to Estimated Unique Counts (EUCs) by correcting for the existence of distinct barcodes of the same sequence with (Fu et al., 2011) or without (Kielpinski and Vinther, 2014) the assumption that all barcodes have the same probability of being captured.
190
191 ------
192
193 **Inputs**
194
195 **Unique Barcodes** or **Read Counts** files produced by *Summarize Unique Barcodes* tool for treated and control samples. If deltaTCR of Smooth Log2 Ratio is desired a Control files are required.
196
197 **k2n file(s)** is (are) required if "HRF-Seq" is selected as the "EUC" method.
198
199 **Reference sequences** in FASTA format. Use the file that you produced the read alignment with.
200
201 ------
202
203 **EUC method**
204
205 * **Counts** - Preserves the count of unique barcodes or read counts (depending on the input). It is the only suitable method for reading-in “Read Counts” file and an applicable method when the highest observed unique barcode count is lower than the square root of the count of all possible barcode combinations (Casbon et al., 2011).
206
207 * **Fu** - Allows different barcodes to share the same sequence and assumes equal probability of ligating each barcode sequence (Fu et al., 2011). When selected the value of the random barcode complexity must be specified (e.g. barcode signature NWTRYSNNNN complexity equals 4×2×1×2×2×2×4×4×4×4 = 16384).
208
209 * **HRF-Seq** - Similar to "Fu" but the probability of ligating different barcodes is estimated via observed frequencies of nucleotides at each barcode position (Kielpinski and Vinther, 2014). Requires k2n files for both treated and control samples.
210
211 ------
212
213 **Normalization methods**
214
215 For a detailed description read (reference to the chapter)
216
217 ------
218
219 **Outputs**
220
221
222 </help>
223
224 <citations>
225 <citation type="doi">10.1093/nar/gku167</citation>
226 <citation type="doi">10.1073/pnas.1017621108</citation>
227 <citation type="doi">10.1093/nar/gkr217</citation>
228 </citations>
229
230 </tool>