annotate summarize_unique_barcodes.xml @ 22:5ee5afb56ca4 draft

Fixed a but that was causing the HRF workflow to crash when an empty barcode sequence was given.
author nikos
date Tue, 04 Aug 2015 04:32:12 -0400
parents f64937805d0d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
17
31f25b37187b Uploaded
nikos
parents:
diff changeset
1 <tool id="rna_probing_summarize" version="1.0.0" name="Summarize Unique Barcodes" force_history_refresh="True">
31f25b37187b Uploaded
nikos
parents:
diff changeset
2 <description></description>
31f25b37187b Uploaded
nikos
parents:
diff changeset
3
31f25b37187b Uploaded
nikos
parents:
diff changeset
4 <requirements>
31f25b37187b Uploaded
nikos
parents:
diff changeset
5 <requirement type="package" version="4.1.0">gnu_awk</requirement>
31f25b37187b Uploaded
nikos
parents:
diff changeset
6 <requirement type="package" version="0.1.19">samtools</requirement>
31f25b37187b Uploaded
nikos
parents:
diff changeset
7 <requirement type="package" version="3.1.1">R_3_1_1</requirement>
31f25b37187b Uploaded
nikos
parents:
diff changeset
8 <requirement type="R-module">RNAprobR</requirement>
31f25b37187b Uploaded
nikos
parents:
diff changeset
9 <requirement type="package" version="1.0.0">RNAprobR</requirement>
31f25b37187b Uploaded
nikos
parents:
diff changeset
10 <requirement type="set_environment">RNA_PROBING_SCRIPT_PATH</requirement>
31f25b37187b Uploaded
nikos
parents:
diff changeset
11 </requirements>
31f25b37187b Uploaded
nikos
parents:
diff changeset
12
31f25b37187b Uploaded
nikos
parents:
diff changeset
13 <command interpreter="bash">
31f25b37187b Uploaded
nikos
parents:
diff changeset
14 summarize_unique_barcodes.sh
31f25b37187b Uploaded
nikos
parents:
diff changeset
15
31f25b37187b Uploaded
nikos
parents:
diff changeset
16 ## Inputs
31f25b37187b Uploaded
nikos
parents:
diff changeset
17 -f $input1 -b $input2
31f25b37187b Uploaded
nikos
parents:
diff changeset
18
31f25b37187b Uploaded
nikos
parents:
diff changeset
19 ##
31f25b37187b Uploaded
nikos
parents:
diff changeset
20
31f25b37187b Uploaded
nikos
parents:
diff changeset
21 #if str( $k2n ) == 'True':
31f25b37187b Uploaded
nikos
parents:
diff changeset
22 -k
31f25b37187b Uploaded
nikos
parents:
diff changeset
23 #end if
31f25b37187b Uploaded
nikos
parents:
diff changeset
24
31f25b37187b Uploaded
nikos
parents:
diff changeset
25 #if str( $priming.flag ) == 'True':
31f25b37187b Uploaded
nikos
parents:
diff changeset
26 -p $priming.position
31f25b37187b Uploaded
nikos
parents:
diff changeset
27 #end if
31f25b37187b Uploaded
nikos
parents:
diff changeset
28
31f25b37187b Uploaded
nikos
parents:
diff changeset
29 #if str( $trimming ) == 'True':
31f25b37187b Uploaded
nikos
parents:
diff changeset
30 -t
31f25b37187b Uploaded
nikos
parents:
diff changeset
31 #end if
31f25b37187b Uploaded
nikos
parents:
diff changeset
32
19
ceb96c265bd1 Trying to set environment r lib path dependencies.
nikos
parents: 18
diff changeset
33 -r \$RNA_PROBING_SCRIPT_PATH
17
31f25b37187b Uploaded
nikos
parents:
diff changeset
34 </command>
31f25b37187b Uploaded
nikos
parents:
diff changeset
35
31f25b37187b Uploaded
nikos
parents:
diff changeset
36 <!-- basic error handling -->
31f25b37187b Uploaded
nikos
parents:
diff changeset
37 <stdio>
31f25b37187b Uploaded
nikos
parents:
diff changeset
38 <regex match="Error" level="fatal" description="" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
39 </stdio>
31f25b37187b Uploaded
nikos
parents:
diff changeset
40
31f25b37187b Uploaded
nikos
parents:
diff changeset
41 <inputs>
31f25b37187b Uploaded
nikos
parents:
diff changeset
42 <param format="bam" name="input1" type="data" label="Aligned Reads" help="BAM format." />
31f25b37187b Uploaded
nikos
parents:
diff changeset
43 <param format="tabular" name="input2" type="data" optional="True" label="Barcodes" help="Produced by Debarcoding tool." />
31f25b37187b Uploaded
nikos
parents:
diff changeset
44 <param name="k2n" type="boolean" checked="False" truevalue="True" falsevalue="False" label="Produce k2n file" help="Check the box if you ran the tool and received a warning message to produce the k2n file. Necessary if you want to use 'HRF-Seq' method in 'Normalize' tool. Warning: Can be very slow!" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
45 <param name="trimming" type="boolean" checked="True" truevalue="True" falsevalue="False" label="Trim untemplated nucleotides" help="" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
46 <conditional name="priming">
31f25b37187b Uploaded
nikos
parents:
diff changeset
47 <param name="flag" type="select" label="Set priming position" help="Set the priming position manually.">
31f25b37187b Uploaded
nikos
parents:
diff changeset
48 <option value="False">No</option>
31f25b37187b Uploaded
nikos
parents:
diff changeset
49 <option value="True">Yes</option>
31f25b37187b Uploaded
nikos
parents:
diff changeset
50 </param>
31f25b37187b Uploaded
nikos
parents:
diff changeset
51 <when value="True">
31f25b37187b Uploaded
nikos
parents:
diff changeset
52 <param name="position" type="integer" value="0" min="0" label="Priming position" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
53 </when>
31f25b37187b Uploaded
nikos
parents:
diff changeset
54 <when value="False" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
55 </conditional>
31f25b37187b Uploaded
nikos
parents:
diff changeset
56 </inputs>
31f25b37187b Uploaded
nikos
parents:
diff changeset
57
31f25b37187b Uploaded
nikos
parents:
diff changeset
58 <outputs>
31f25b37187b Uploaded
nikos
parents:
diff changeset
59 <data format="tabular" name="trimming_stats" label="${tool.name} on ${on_string}: Trimming stats" from_work_dir="output_dir/trimming_stats.txt">
31f25b37187b Uploaded
nikos
parents:
diff changeset
60 <filter>trimming is True</filter>
31f25b37187b Uploaded
nikos
parents:
diff changeset
61 </data>
31f25b37187b Uploaded
nikos
parents:
diff changeset
62 <data format="tabular" name="unique_barcodes" label="${tool.name} on ${on_string}: Unique Barcodes" from_work_dir="output_dir/unique_barcodes.txt">
31f25b37187b Uploaded
nikos
parents:
diff changeset
63 <filter> input2 != None </filter>
31f25b37187b Uploaded
nikos
parents:
diff changeset
64 </data>
31f25b37187b Uploaded
nikos
parents:
diff changeset
65 <data format="tabular" name="read_counts" label="${tool.name} on ${on_string}: Read Counts" from_work_dir="output_dir/read_counts.txt" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
66 <data format="txt" name="k2n_file" label="${tool.name} on ${on_string}: k2n file" from_work_dir="output_dir/k2n.txt">
31f25b37187b Uploaded
nikos
parents:
diff changeset
67 <filter> k2n is True </filter>
31f25b37187b Uploaded
nikos
parents:
diff changeset
68 </data>
31f25b37187b Uploaded
nikos
parents:
diff changeset
69 </outputs>
31f25b37187b Uploaded
nikos
parents:
diff changeset
70
31f25b37187b Uploaded
nikos
parents:
diff changeset
71 <tests>
31f25b37187b Uploaded
nikos
parents:
diff changeset
72 <test>
31f25b37187b Uploaded
nikos
parents:
diff changeset
73 <param name="input1" value="aligned.bam" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
74 <param name="input2" value="barcodes.txt" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
75 <param name="k2n" value="True" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
76 <param name="trimming" value="True" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
77 <output name="trimming_stats" file="trimming_stats.txt" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
78 <output name="unique_barcodes" file="unique_barcodes.txt" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
79 <output name="read_counts" file="read_counts.txt" />
31f25b37187b Uploaded
nikos
parents:
diff changeset
80 </test>
31f25b37187b Uploaded
nikos
parents:
diff changeset
81 </tests>
31f25b37187b Uploaded
nikos
parents:
diff changeset
82
31f25b37187b Uploaded
nikos
parents:
diff changeset
83 <help>
31f25b37187b Uploaded
nikos
parents:
diff changeset
84 **What it does**
31f25b37187b Uploaded
nikos
parents:
diff changeset
85
31f25b37187b Uploaded
nikos
parents:
diff changeset
86 *Summarize Unique Barcodes* counts the number of unique random barcodes and reads associated with each sequenced fragment. A fragment is understood as 1) a pair of Reverse Transcriptase (RT) termination site and RT priming site given paired end sequencing, or 2) an RT termination site in a single end sequencing. For non-barcoded sequencing it only counts the reads matching each fragment.
31f25b37187b Uploaded
nikos
parents:
diff changeset
87
31f25b37187b Uploaded
nikos
parents:
diff changeset
88 ------
31f25b37187b Uploaded
nikos
parents:
diff changeset
89
31f25b37187b Uploaded
nikos
parents:
diff changeset
90 **Inputs**
31f25b37187b Uploaded
nikos
parents:
diff changeset
91
31f25b37187b Uploaded
nikos
parents:
diff changeset
92 *Summarize Unique Barcodes* requires a file containing the Aligned Reads (required) in BAM_ format and tabular file with the Barcodes (optional) produced by the *Preprocessing* tool of the *RNA probing* suite.
31f25b37187b Uploaded
nikos
parents:
diff changeset
93
31f25b37187b Uploaded
nikos
parents:
diff changeset
94 .. _BAM: http://samtools.github.io/hts-specs/SAMv1.pdf
31f25b37187b Uploaded
nikos
parents:
diff changeset
95
31f25b37187b Uploaded
nikos
parents:
diff changeset
96 -------
31f25b37187b Uploaded
nikos
parents:
diff changeset
97
31f25b37187b Uploaded
nikos
parents:
diff changeset
98 **Parameters**
31f25b37187b Uploaded
nikos
parents:
diff changeset
99
31f25b37187b Uploaded
nikos
parents:
diff changeset
100 **Produce k2n file** - A file that contains a sequence of numbers where the n-th element informs how many unique cDNA molecules gives rise to observing n unique barcodes in a given sample. Required for calculating Estimated Unique Counts (EUCs) in tool *Normalize*.
31f25b37187b Uploaded
nikos
parents:
diff changeset
101
31f25b37187b Uploaded
nikos
parents:
diff changeset
102 **Trim untemplated nucleotides** - Untemplated nucleotides can be added to cDNA 3’ ends via terminal transferase activity of reverse transcriptase which offset the location of the read-end mapping and lead to erroneous assignment of reactivity information to nucleotides upstream of those which has reacted (Schmidt and Mueller, 1999, Talkish et al., 2014). Setting this parameter on will remove those nucleotides.
31f25b37187b Uploaded
nikos
parents:
diff changeset
103
31f25b37187b Uploaded
nikos
parents:
diff changeset
104 Recommended for methods based on detecting reverse transcription termination sites (e.g. DMS-Seq, HRF-Seq or SHAPE-Seq), and not for methods based on ligating the linker directly to RNA (e.g. PARS or FragSeq).
31f25b37187b Uploaded
nikos
parents:
diff changeset
105
31f25b37187b Uploaded
nikos
parents:
diff changeset
106 **Set priming position** - Applicable when the priming site is fixed.
31f25b37187b Uploaded
nikos
parents:
diff changeset
107
31f25b37187b Uploaded
nikos
parents:
diff changeset
108 ------
31f25b37187b Uploaded
nikos
parents:
diff changeset
109
31f25b37187b Uploaded
nikos
parents:
diff changeset
110 **Outputs**
31f25b37187b Uploaded
nikos
parents:
diff changeset
111
31f25b37187b Uploaded
nikos
parents:
diff changeset
112 **Unique Barcodes** (if a Barcode file is given) is a tabular file with 4 columns.
31f25b37187b Uploaded
nikos
parents:
diff changeset
113
31f25b37187b Uploaded
nikos
parents:
diff changeset
114 ====== ==========================================================
31f25b37187b Uploaded
nikos
parents:
diff changeset
115 Column Description
31f25b37187b Uploaded
nikos
parents:
diff changeset
116 ------ ----------------------------------------------------------
31f25b37187b Uploaded
nikos
parents:
diff changeset
117 1 Transcript identifier
31f25b37187b Uploaded
nikos
parents:
diff changeset
118 2 RT termination site (start)
31f25b37187b Uploaded
nikos
parents:
diff changeset
119 3 RT priming site (end)
31f25b37187b Uploaded
nikos
parents:
diff changeset
120 4 Count of unique barcodes associated with fragments matching the first three columns
31f25b37187b Uploaded
nikos
parents:
diff changeset
121 ====== ==========================================================
31f25b37187b Uploaded
nikos
parents:
diff changeset
122
31f25b37187b Uploaded
nikos
parents:
diff changeset
123 .
31f25b37187b Uploaded
nikos
parents:
diff changeset
124
31f25b37187b Uploaded
nikos
parents:
diff changeset
125 **Read Counts** is similar to Unique Barcodes but the fourth column is a count of reads matching first three columns.
31f25b37187b Uploaded
nikos
parents:
diff changeset
126
31f25b37187b Uploaded
nikos
parents:
diff changeset
127 **k2n file** as described above.
31f25b37187b Uploaded
nikos
parents:
diff changeset
128
31f25b37187b Uploaded
nikos
parents:
diff changeset
129 **Trimming Stats** reports statistics of trimming untemplated nucleotides from read ends.
31f25b37187b Uploaded
nikos
parents:
diff changeset
130
31f25b37187b Uploaded
nikos
parents:
diff changeset
131 </help>
31f25b37187b Uploaded
nikos
parents:
diff changeset
132
31f25b37187b Uploaded
nikos
parents:
diff changeset
133 <citations>
31f25b37187b Uploaded
nikos
parents:
diff changeset
134 <citation type="doi">10.1093/nar/gku167</citation>
31f25b37187b Uploaded
nikos
parents:
diff changeset
135 <citation type="doi">10.1093/nar/27.21.e31-i</citation>
31f25b37187b Uploaded
nikos
parents:
diff changeset
136 <citation type="doi">10.1261/rna.042218.113</citation>
31f25b37187b Uploaded
nikos
parents:
diff changeset
137 </citations>
31f25b37187b Uploaded
nikos
parents:
diff changeset
138
31f25b37187b Uploaded
nikos
parents:
diff changeset
139 </tool>