diff preprocessing.xml @ 24:431aebd93843 draft default tip

Fixed a bug in k2n.R where the function k2n_calc() would result in an error for single-end read files.
author nikos
date Wed, 05 Aug 2015 09:21:02 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocessing.xml	Wed Aug 05 09:21:02 2015 -0400
@@ -0,0 +1,136 @@
+<tool id="rna_probing_preprocessing" version="1.0.0" name="Preprocessing" force_history_refresh="True">
+    <description>RNA probing data</description>
+
+    <requirements>
+       <requirement type="package" version="4.1.0">gnu_awk</requirement>
+       <requirement type="set_environment">RNA_RPOBING_SCRIPT_PATH</requirement>
+    </requirements>
+
+    <command interpreter="bash">
+        preprocessing.sh
+
+        ## check if paired-end
+        #if str( $library.type ) == "paired"
+            -2 $library.input2
+        #end if
+
+        ## Inputs
+        -1 $library.input1
+
+        ## Barcode sequence
+        -b '$library.barcode_seq'
+
+        ## Trimming length
+        -t $trim
+    </command>
+
+    <inputs>
+	<!-- single/paired -->
+        <conditional name="library">
+            <param name="type" type="select" label="Is this single or paired-end sequencing?">
+                <option value="single">Single-end</option>
+                <option value="paired">Paired-end</option>
+            </param>
+            <when value="single">
+                <param format="fastqsanger" name="input1" type="data" label="FASTQ file" help="Must have Sanger-scaled quality values (fastqsanger)." />
+                <param name="barcode_seq" type="text" size="20" label="Barcode sequence" help="Reads that do not start with the signature will be removed. Use IUPAC alphabet, e.g. NNNNXRTYNN as in the randomized part of the ligation adapter." >
+    <!--		    <validator type="empty_field" message="Specify the Barcode sequence" /> -->
+                </param>
+            </when>
+            <when value="paired">
+                <param format="fastqsanger" name="input1" type="data" label="FASTQ file (read 1)" help="Must have Sanger-scaled quality values (fastqsanger)." />
+                <param format="fastqsanger" name="input2" type="data" label="FASTQ file (read 2)" help="Must have Sanger-scaled quality values (fastqsanger)." />
+                <param name="barcode_seq" type="text" size="20" label="Barcode sequence" help="Reads that do not start with the signature will be removed. Use IUPAC alphabet, e.g. NNNNXRTYNN as in the randomized part of the ligation adapter." >
+                    <!-- <validator type="empty_field" message="Specify the Barcode sequence" /> -->
+                </param>
+            </when>
+        </conditional>
+        <param name="trim" type="integer" min="0" optional="true" value="15" label="3' trimming length" help="Number of random bases for random priming, will be removed as they are likely to differ from a template." />
+
+    </inputs>
+
+    <outputs>
+        <data format="fastqsanger" name="output1" label="${tool.name} on ${on_string}: Read 1" from_work_dir="output_dir/read1.fastq" />
+        <data format="fastqsanger" name="output2" label="${tool.name} on ${on_string}: Read 2" from_work_dir="output_dir/read2.fastq" >
+            <filter> library['type'] == "paired"</filter>
+        </data>
+        <data format="tabular" name="barcodes" label="${tool.name} on ${on_string}: Barcodes" from_work_dir="output_dir/barcodes.txt">
+            <!-- <filter> library['barcode_seq'] != '' </filter> -->
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input1" value="reads1.fastq"/>
+            <param name="input2" value="reads2.fastq"/>
+            <param name="barcode_seq" value="NNNNNNN"/>
+            <param name="trim" value="15"/>
+            <output name="output1" file="reads1_preprocessed.fastq"/>
+            <output name="output2" file="reads2_preprocessed.fastq"/>
+            <output name="barcodes" file="barcodes.txt"/>
+        </test>
+    </tests>
+
+    <help>
+**What it does**
+
+*Preprocessing* tool removes and saves the random barcodes sequences, if they were ligated to 3’ ends of cDNA, in a separate dataset to be used in downstream analysis. Additionally to debarcoding, it trims 1) the 5’ end of the second-in-pair reads to remove the reverse transcription primer derived sequence and 2) 3’ end of both reads to remove possible random barcode incorporation in the second-in-pair read and random primer in first-in-pair read.
+
+------
+
+**Examples**
+
+Sample input files (quality scores omited)::
+
+  * Read1
+    @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296 1:N:0:ATCACG
+    TTCGCACAACATNATGGAGGCTTCACGGTACAGAACGAGGCCAGCAAATACCAAGTCTCAGTGAACAAATACAAAGGGACGGCTGGCAACGCCCTCAT
+    @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461 1:N:0:ATCACG
+    ACCCCGCATCAAATTGGGAACTACTTCCAGCAGTTGTTAGACTTGGGCTCTGGCAGCCCCTTGGAGTGGAGGGACTTGCAGCCCTTCTTATCAGGTCT
+
+  * Read2
+    @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296 2:N:0:ATCACG
+    CACAAATCTGCCGTTTGGATTGGCTGCATGGCATCTGTTATACCACCAGCCACCACCATCTTCTTTGGAGCACTGTTTTCTTGGATCCGTAGTTACCC
+    @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461 2:N:0:ATCACG
+    GTTGGGGGTGTGGGGAAAAAAATAAAAATCGTGAGAAGTTTTAAGACTATGTCACAAAAATGGCTTTAATTATACCATCAAACAGAAACCACCAATTG
+
+Run 1 - Barcode Sequence = '', Trimming length = 10::
+
+  * Read1
+    @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296
+    TTCGCACAACATNATGGAGGCTTCACGGTACAGAACGAGGCCAGCAAATACCAAGTCTCAGTGAACAAATACAAAGGGACGGCTGGCA
+    @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461
+    ACCCCGCATCAAATTGGGAACTACTTCCAGCAGTTGTTAGACTTGGGCTCTGGCAGCCCCTTGGAGTGGAGGGACTTGCAGCCCTTCT
+
+  * Read2
+    @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296
+    CCGTTTGGATTGGCTGCATGGCATCTGTTATACCACCAGCCACCACCATCTTCTTTGGAGCACTGTTTTCTTGGATCCGTAGTTACCC
+    @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461
+    TGGGGAAAAAAATAAAAATCGTGAGAAGTTTTAAGACTATGTCACAAAAATGGCTTTAATTATACCATCAAACAGAAACCACCAATTG
+
+Run 2 - Barcode Sequence = 'NNNNNNN', Trimming length = 10::
+
+  * Read1
+    @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296
+    AACATNATGGAGGCTTCACGGTACAGAACGAGGCCAGCAAATACCAAGTCTCAGTGAACAAATACAAAGGGACGGCTGGCA
+    @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461
+    ATCAAATTGGGAACTACTTCCAGCAGTTGTTAGACTTGGGCTCTGGCAGCCCCTTGGAGTGGAGGGACTTGCAGCCCTTCT
+
+  * Read2
+    @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296
+    CCGTTTGGATTGGCTGCATGGCATCTGTTATACCACCAGCCACCACCATCTTCTTTGGAGCACTGTTTTCTTGGATCCGTA
+    @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461
+    TGGGGAAAAAAATAAAAATCGTGAGAAGTTTTAAGACTATGTCACAAAAATGGCTTTAATTATACCATCAAACAGAAACCA
+
+  * Barcodes
+
+    @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296        TTCGCAC
+    @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461        ACCCCGC
+
+    </help>
+
+    <citations>
+        <citation type="doi">10.1093/nar/gku167</citation>
+    </citations>
+
+</tool>