Mercurial > repos > nikos > rna_probing
diff preprocessing.xml @ 24:431aebd93843 draft default tip
Fixed a bug in k2n.R where the function k2n_calc() would result in an error for single-end read files.
author | nikos |
---|---|
date | Wed, 05 Aug 2015 09:21:02 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/preprocessing.xml Wed Aug 05 09:21:02 2015 -0400 @@ -0,0 +1,136 @@ +<tool id="rna_probing_preprocessing" version="1.0.0" name="Preprocessing" force_history_refresh="True"> + <description>RNA probing data</description> + + <requirements> + <requirement type="package" version="4.1.0">gnu_awk</requirement> + <requirement type="set_environment">RNA_RPOBING_SCRIPT_PATH</requirement> + </requirements> + + <command interpreter="bash"> + preprocessing.sh + + ## check if paired-end + #if str( $library.type ) == "paired" + -2 $library.input2 + #end if + + ## Inputs + -1 $library.input1 + + ## Barcode sequence + -b '$library.barcode_seq' + + ## Trimming length + -t $trim + </command> + + <inputs> + <!-- single/paired --> + <conditional name="library"> + <param name="type" type="select" label="Is this single or paired-end sequencing?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param format="fastqsanger" name="input1" type="data" label="FASTQ file" help="Must have Sanger-scaled quality values (fastqsanger)." /> + <param name="barcode_seq" type="text" size="20" label="Barcode sequence" help="Reads that do not start with the signature will be removed. Use IUPAC alphabet, e.g. NNNNXRTYNN as in the randomized part of the ligation adapter." > + <!-- <validator type="empty_field" message="Specify the Barcode sequence" /> --> + </param> + </when> + <when value="paired"> + <param format="fastqsanger" name="input1" type="data" label="FASTQ file (read 1)" help="Must have Sanger-scaled quality values (fastqsanger)." /> + <param format="fastqsanger" name="input2" type="data" label="FASTQ file (read 2)" help="Must have Sanger-scaled quality values (fastqsanger)." /> + <param name="barcode_seq" type="text" size="20" label="Barcode sequence" help="Reads that do not start with the signature will be removed. Use IUPAC alphabet, e.g. NNNNXRTYNN as in the randomized part of the ligation adapter." > + <!-- <validator type="empty_field" message="Specify the Barcode sequence" /> --> + </param> + </when> + </conditional> + <param name="trim" type="integer" min="0" optional="true" value="15" label="3' trimming length" help="Number of random bases for random priming, will be removed as they are likely to differ from a template." /> + + </inputs> + + <outputs> + <data format="fastqsanger" name="output1" label="${tool.name} on ${on_string}: Read 1" from_work_dir="output_dir/read1.fastq" /> + <data format="fastqsanger" name="output2" label="${tool.name} on ${on_string}: Read 2" from_work_dir="output_dir/read2.fastq" > + <filter> library['type'] == "paired"</filter> + </data> + <data format="tabular" name="barcodes" label="${tool.name} on ${on_string}: Barcodes" from_work_dir="output_dir/barcodes.txt"> + <!-- <filter> library['barcode_seq'] != '' </filter> --> + </data> + </outputs> + + <tests> + <test> + <param name="input1" value="reads1.fastq"/> + <param name="input2" value="reads2.fastq"/> + <param name="barcode_seq" value="NNNNNNN"/> + <param name="trim" value="15"/> + <output name="output1" file="reads1_preprocessed.fastq"/> + <output name="output2" file="reads2_preprocessed.fastq"/> + <output name="barcodes" file="barcodes.txt"/> + </test> + </tests> + + <help> +**What it does** + +*Preprocessing* tool removes and saves the random barcodes sequences, if they were ligated to 3’ ends of cDNA, in a separate dataset to be used in downstream analysis. Additionally to debarcoding, it trims 1) the 5’ end of the second-in-pair reads to remove the reverse transcription primer derived sequence and 2) 3’ end of both reads to remove possible random barcode incorporation in the second-in-pair read and random primer in first-in-pair read. + +------ + +**Examples** + +Sample input files (quality scores omited):: + + * Read1 + @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296 1:N:0:ATCACG + TTCGCACAACATNATGGAGGCTTCACGGTACAGAACGAGGCCAGCAAATACCAAGTCTCAGTGAACAAATACAAAGGGACGGCTGGCAACGCCCTCAT + @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461 1:N:0:ATCACG + ACCCCGCATCAAATTGGGAACTACTTCCAGCAGTTGTTAGACTTGGGCTCTGGCAGCCCCTTGGAGTGGAGGGACTTGCAGCCCTTCTTATCAGGTCT + + * Read2 + @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296 2:N:0:ATCACG + CACAAATCTGCCGTTTGGATTGGCTGCATGGCATCTGTTATACCACCAGCCACCACCATCTTCTTTGGAGCACTGTTTTCTTGGATCCGTAGTTACCC + @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461 2:N:0:ATCACG + GTTGGGGGTGTGGGGAAAAAAATAAAAATCGTGAGAAGTTTTAAGACTATGTCACAAAAATGGCTTTAATTATACCATCAAACAGAAACCACCAATTG + +Run 1 - Barcode Sequence = '', Trimming length = 10:: + + * Read1 + @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296 + TTCGCACAACATNATGGAGGCTTCACGGTACAGAACGAGGCCAGCAAATACCAAGTCTCAGTGAACAAATACAAAGGGACGGCTGGCA + @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461 + ACCCCGCATCAAATTGGGAACTACTTCCAGCAGTTGTTAGACTTGGGCTCTGGCAGCCCCTTGGAGTGGAGGGACTTGCAGCCCTTCT + + * Read2 + @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296 + CCGTTTGGATTGGCTGCATGGCATCTGTTATACCACCAGCCACCACCATCTTCTTTGGAGCACTGTTTTCTTGGATCCGTAGTTACCC + @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461 + TGGGGAAAAAAATAAAAATCGTGAGAAGTTTTAAGACTATGTCACAAAAATGGCTTTAATTATACCATCAAACAGAAACCACCAATTG + +Run 2 - Barcode Sequence = 'NNNNNNN', Trimming length = 10:: + + * Read1 + @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296 + AACATNATGGAGGCTTCACGGTACAGAACGAGGCCAGCAAATACCAAGTCTCAGTGAACAAATACAAAGGGACGGCTGGCA + @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461 + ATCAAATTGGGAACTACTTCCAGCAGTTGTTAGACTTGGGCTCTGGCAGCCCCTTGGAGTGGAGGGACTTGCAGCCCTTCT + + * Read2 + @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296 + CCGTTTGGATTGGCTGCATGGCATCTGTTATACCACCAGCCACCACCATCTTCTTTGGAGCACTGTTTTCTTGGATCCGTA + @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461 + TGGGGAAAAAAATAAAAATCGTGAGAAGTTTTAAGACTATGTCACAAAAATGGCTTTAATTATACCATCAAACAGAAACCA + + * Barcodes + + @DJG83KN1:255:C3U57ACXX:3:1101:1215:2296 TTCGCAC + @DJG83KN1:255:C3U57ACXX:3:1101:1142:2461 ACCCCGC + + </help> + + <citations> + <citation type="doi">10.1093/nar/gku167</citation> + </citations> + +</tool>