Mercurial > repos > devteam > split_paired_reads
changeset 0:e7d1ffdbb9b6 draft default tip
Uploaded
author | devteam |
---|---|
date | Wed, 21 Aug 2013 13:20:57 -0400 |
parents | |
children | |
files | split_paired_reads.py split_paired_reads.xml test-data/3.fastqsanger test-data/split_pair_reads_1.fastqsanger test-data/split_pair_reads_2.fastqsanger |
diffstat | 5 files changed, 171 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split_paired_reads.py Wed Aug 21 13:20:57 2013 -0400 @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +""" +Split fixed length paired end reads +""" + +import os, sys + +if __name__ == '__main__': + + infile = sys.argv[1] + outfile_end1 = open(sys.argv[2], 'w') + outfile_end2 = open(sys.argv[3], 'w') + + i = 0 + + for line in file( infile ): + line = line.rstrip() + + if not line: + continue + + end1 = '' + end2 = '' + + line_index = i % 4 + + if line_index == 0: + end1 = line + '/1' + end2 = line + '/2' + + elif line_index == 1: + seq_len = len(line)/2 + end1 = line[0:seq_len] + end2 = line[seq_len:] + + elif line_index == 2: + end1 = line + '/1' + end2 = line + '/2' + + else: + qual_len = len(line)/2 + end1 = line[0:qual_len] + end2 = line[qual_len:] + + outfile_end1.write('%s\n' %(end1)) + outfile_end2.write('%s\n' %(end2)) + + i += 1 + + if i % 4 != 0 : + sys.stderr.write("WARNING: Number of lines in the input file was not divisible by 4.\nCheck consistency of the input fastq file.\n") + outfile_end1.close() + outfile_end2.close() \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split_paired_reads.xml Wed Aug 21 13:20:57 2013 -0400 @@ -0,0 +1,57 @@ +<tool id="split_paired_reads" name="Split paired end reads" version="1.0.0"> + <description></description> + <command interpreter="python"> + split_paired_reads.py $input $output1 $output2 + </command> + <inputs> + <param name="input" type="data" format="fastqsanger" label="Your paired-end file" /> + </inputs> + <outputs> + <data name="output1" format="fastqsanger"/> + <data name="output2" format="fastqsanger"/> + </outputs> + <tests> + <test> + <param name="input" value="3.fastqsanger" ftype="fastqsanger"/> + <output name="output1" file="split_pair_reads_1.fastqsanger" ftype="fastqsanger"/> + <output name="output2" file="split_pair_reads_2.fastqsanger" ftype="fastqsanger"/> + </test> + </tests> +<help> + +**What it does** + +Splits a single fastq dataset representing paired-end run into two datasets (one for each end). This tool works only for datasets where both ends have **the same** length. + +----- + +**Input formats** + +A multiple-fastq file, for example:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758 + GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA + +HWI-EAS91_1_30788AAXX:7:21:1542:1758 + hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR + + +----- + +**Outputs** + +One end:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 + GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC + +HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 + hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh + +The other end:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 + GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA + +HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 + hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3.fastqsanger Wed Aug 21 13:20:57 2013 -0400 @@ -0,0 +1,20 @@ +@HWI-EAS91_1_30788AAXX:7:21:1542:1758 +GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA ++HWI-EAS91_1_30788AAXX:7:21:1542:1758 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR +@HWI-EAS91_1_30788AAXX:7:22:1621:462 +ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAAACTAGCCCCAATATCAATCCTATATCAAATCTCACC ++HWI-EAS91_1_30788AAXX:7:22:1621:462 +hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?hhJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh +@HWI-EAS91_1_30788AAXX:7:45:408:807 +TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTTATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT ++HWI-EAS91_1_30788AAXX:7:45:408:807 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh +@HWI-EAS91_1_30788AAXX:7:49:654:1439 +CTAACTCTATTTATTGTATTTCAACTAAAAATCTCATAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG ++HWI-EAS91_1_30788AAXX:7:49:654:1439 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@ +@HWI-EAS91_1_30788AAXX:7:64:947:234 +TATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG ++HWI-EAS91_1_30788AAXX:7:64:947:234 +hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJhhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/split_pair_reads_1.fastqsanger Wed Aug 21 13:20:57 2013 -0400 @@ -0,0 +1,20 @@ +@HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 +GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC ++HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +@HWI-EAS91_1_30788AAXX:7:22:1621:462/1 +ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAA ++HWI-EAS91_1_30788AAXX:7:22:1621:462/1 +hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?h +@HWI-EAS91_1_30788AAXX:7:45:408:807/1 +TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTT ++HWI-EAS91_1_30788AAXX:7:45:408:807/1 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +@HWI-EAS91_1_30788AAXX:7:49:654:1439/1 +CTAACTCTATTTATTGTATTTCAACTAAAAATCTCA ++HWI-EAS91_1_30788AAXX:7:49:654:1439/1 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +@HWI-EAS91_1_30788AAXX:7:64:947:234/1 +TATCAAAAAAGAATATAATCTGAATCAACACTACAA ++HWI-EAS91_1_30788AAXX:7:64:947:234/1 +hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJ
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/split_pair_reads_2.fastqsanger Wed Aug 21 13:20:57 2013 -0400 @@ -0,0 +1,20 @@ +@HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 +GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA ++HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 +hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR +@HWI-EAS91_1_30788AAXX:7:22:1621:462/2 +ACTAGCCCCAATATCAATCCTATATCAAATCTCACC ++HWI-EAS91_1_30788AAXX:7:22:1621:462/2 +hJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh +@HWI-EAS91_1_30788AAXX:7:45:408:807/2 +ATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT ++HWI-EAS91_1_30788AAXX:7:45:408:807/2 +hhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh +@HWI-EAS91_1_30788AAXX:7:49:654:1439/2 +TAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG ++HWI-EAS91_1_30788AAXX:7:49:654:1439/2 +hhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@ +@HWI-EAS91_1_30788AAXX:7:64:947:234/2 +CCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG ++HWI-EAS91_1_30788AAXX:7:64:947:234/2 +hhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd