|
10
|
1 <tool id="rna_probing_preprocessing" version="1.0.0" name="Preprocessing" force_history_refresh="True">
|
|
|
2 <description>RNA probing data</description>
|
|
|
3
|
|
|
4 <requirements>
|
|
|
5 <requirement type="package" version="4.1.0">gnu_awk</requirement>
|
|
|
6 <requirement type="set_environment">RNA_RPOBING_SCRIPT_PATH</requirement>
|
|
|
7 </requirements>
|
|
|
8
|
|
|
9 <command interpreter="bash">
|
|
|
10 preprocessing.sh
|
|
|
11
|
|
|
12 ## check if paired-end
|
|
|
13 #if str( $library.type ) == "paired"
|
|
|
14 -2 $library.input2
|
|
|
15 #end if
|
|
|
16
|
|
|
17 ## Inputs
|
|
|
18 -1 $library.input1
|
|
|
19
|
|
|
20 ## Barcode sequence
|
|
|
21 -b '$library.barcode_seq'
|
|
|
22
|
|
|
23 ## Trimming length
|
|
|
24 -t $trim
|
|
|
25 </command>
|
|
|
26
|
|
|
27 <inputs>
|
|
|
28 <!-- single/paired -->
|
|
|
29 <conditional name="library">
|
|
|
30 <param name="type" type="select" label="Is this single or paired-end sequencing?">
|
|
|
31 <option value="single">Single-end</option>
|
|
|
32 <option value="paired">Paired-end</option>
|
|
|
33 </param>
|
|
|
34 <when value="single">
|
|
|
35 <param format="fastqsanger" name="input1" type="data" label="FASTQ file" help="Must have Sanger-scaled quality values (fastqsanger)." />
|
|
|
36 <param name="barcode_seq" type="text" size="20" label="Barcode sequence" help="Reads that do not start with the signature will be removed. Use IUPAC alphabet, e.g. NNNNXRTYNN as in the randomized part of the ligation adapter." >
|
|
|
37 <!-- <validator type="empty_field" message="Specify the Barcode sequence" /> -->
|
|
|
38 </param>
|
|
|
39 </when>
|
|
|
40 <when value="paired">
|
|
|
41 <param format="fastqsanger" name="input1" type="data" label="FASTQ file (read 1)" help="Must have Sanger-scaled quality values (fastqsanger)." />
|
|
|
42 <param format="fastqsanger" name="input2" type="data" label="FASTQ file (read 2)" help="Must have Sanger-scaled quality values (fastqsanger)." />
|
|
|
43 <param name="barcode_seq" type="text" size="20" label="Barcode sequence" help="Reads that do not start with the signature will be removed. Use IUPAC alphabet, e.g. NNNNXRTYNN as in the randomized part of the ligation adapter." >
|
|
|
44 <!-- <validator type="empty_field" message="Specify the Barcode sequence" /> -->
|
|
|
45 </param>
|
|
|
46 </when>
|
|
|
47 </conditional>
|
|
|
48 <param name="trim" type="integer" min="0" optional="true" value="15" label="3' trimming length" help="Number of random bases for random priming, will be removed as they are likely to differ from a template." />
|
|
|
49
|
|
|
50 </inputs>
|
|
|
51
|
|
|
52 <outputs>
|
|
|
53 <data format="fastqsanger" name="output1" label="${tool.name} on ${on_string}: Read 1" from_work_dir="output_dir/read1.fastq" />
|
|
|
54 <data format="fastqsanger" name="output2" label="${tool.name} on ${on_string}: Read 2" from_work_dir="output_dir/read2.fastq" >
|
|
|
55 <filter> library['type'] == "paired"</filter>
|
|
|
56 </data>
|
|
|
57 <data format="tabular" name="barcodes" label="${tool.name} on ${on_string}: Barcodes" from_work_dir="output_dir/barcodes.txt">
|
|
|
58 <filter> library['barcode_seq'] != '' </filter>
|
|
|
59 </data>
|
|
|
60 </outputs>
|
|
|
61
|
|
|
62 <tests>
|
|
|
63 <test>
|
|
|
64 <param name="input1" value="reads1.fastq"/>
|
|
|
65 <param name="input2" value="reads2.fastq"/>
|
|
|
66 <param name="barcode_seq" value="NNNNNNN"/>
|
|
|
67 <param name="trim" value="15"/>
|
|
|
68 <output name="output1" file="reads1_preprocessed.fastq"/>
|
|
|
69 <output name="output2" file="reads2_preprocessed.fastq"/>
|
|
|
70 <output name="barcodes" file="barcodes.txt"/>
|
|
|
71 </test>
|
|
|
72 </tests>
|
|
|
73
|
|
|
74 <help>
|
|
|
75 **What it does**
|
|
|
76
|
|
|
77 *Preprocessing* tool removes and saves the random barcodes sequences, if they were ligated to 3’ ends of cDNA, in a separate dataset to be used in downstream analysis. Additionally to debarcoding, it trims 1) the 5’ end of the second-in-pair reads to remove the reverse transcription primer derived sequence and 2) 3’ end of both reads to remove possible random barcode incorporation in the second-in-pair read and random primer in first-in-pair read.
|
|
|
78
|
|
|
79 ------
|
|
|
80
|
|
|
81 **Examples**
|
|
|
82
|
|
|
83 Sample input files (quality scores omited)::
|
|
|
84
|
|
|
85 * Read1
|
|
|
86 @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296 1:N:0:ATCACG
|
|
|
87 TTCGCACAACATNATGGAGGCTTCACGGTACAGAACGAGGCCAGCAAATACCAAGTCTCAGTGAACAAATACAAAGGGACGGCTGGCAACGCCCTCAT
|
|
|
88 @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461 1:N:0:ATCACG
|
|
|
89 ACCCCGCATCAAATTGGGAACTACTTCCAGCAGTTGTTAGACTTGGGCTCTGGCAGCCCCTTGGAGTGGAGGGACTTGCAGCCCTTCTTATCAGGTCT
|
|
|
90
|
|
|
91 * Read2
|
|
|
92 @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296 2:N:0:ATCACG
|
|
|
93 CACAAATCTGCCGTTTGGATTGGCTGCATGGCATCTGTTATACCACCAGCCACCACCATCTTCTTTGGAGCACTGTTTTCTTGGATCCGTAGTTACCC
|
|
|
94 @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461 2:N:0:ATCACG
|
|
|
95 GTTGGGGGTGTGGGGAAAAAAATAAAAATCGTGAGAAGTTTTAAGACTATGTCACAAAAATGGCTTTAATTATACCATCAAACAGAAACCACCAATTG
|
|
|
96
|
|
|
97 Run 1 - Barcode Sequence = '', Trimming length = 10::
|
|
|
98
|
|
|
99 * Read1
|
|
|
100 @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296
|
|
|
101 TTCGCACAACATNATGGAGGCTTCACGGTACAGAACGAGGCCAGCAAATACCAAGTCTCAGTGAACAAATACAAAGGGACGGCTGGCA
|
|
|
102 @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461
|
|
|
103 ACCCCGCATCAAATTGGGAACTACTTCCAGCAGTTGTTAGACTTGGGCTCTGGCAGCCCCTTGGAGTGGAGGGACTTGCAGCCCTTCT
|
|
|
104
|
|
|
105 * Read2
|
|
|
106 @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296
|
|
|
107 CCGTTTGGATTGGCTGCATGGCATCTGTTATACCACCAGCCACCACCATCTTCTTTGGAGCACTGTTTTCTTGGATCCGTAGTTACCC
|
|
|
108 @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461
|
|
|
109 TGGGGAAAAAAATAAAAATCGTGAGAAGTTTTAAGACTATGTCACAAAAATGGCTTTAATTATACCATCAAACAGAAACCACCAATTG
|
|
|
110
|
|
|
111 Run 2 - Barcode Sequence = 'NNNNNNN', Trimming length = 10::
|
|
|
112
|
|
|
113 * Read1
|
|
|
114 @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296
|
|
|
115 AACATNATGGAGGCTTCACGGTACAGAACGAGGCCAGCAAATACCAAGTCTCAGTGAACAAATACAAAGGGACGGCTGGCA
|
|
|
116 @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461
|
|
|
117 ATCAAATTGGGAACTACTTCCAGCAGTTGTTAGACTTGGGCTCTGGCAGCCCCTTGGAGTGGAGGGACTTGCAGCCCTTCT
|
|
|
118
|
|
|
119 * Read2
|
|
|
120 @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296
|
|
|
121 CCGTTTGGATTGGCTGCATGGCATCTGTTATACCACCAGCCACCACCATCTTCTTTGGAGCACTGTTTTCTTGGATCCGTA
|
|
|
122 @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461
|
|
|
123 TGGGGAAAAAAATAAAAATCGTGAGAAGTTTTAAGACTATGTCACAAAAATGGCTTTAATTATACCATCAAACAGAAACCA
|
|
|
124
|
|
|
125 * Barcodes
|
|
|
126
|
|
|
127 @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296 TTCGCAC
|
|
|
128 @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461 ACCCCGC
|
|
|
129
|
|
|
130 </help>
|
|
|
131
|
|
|
132 <citations>
|
|
|
133 <citation type="doi">10.1093/nar/gku167</citation>
|
|
|
134 </citations>
|
|
|
135
|
|
|
136 </tool>
|