changeset 0:e7d1ffdbb9b6 draft default tip

Uploaded
author devteam
date Wed, 21 Aug 2013 13:20:57 -0400
parents
children
files split_paired_reads.py split_paired_reads.xml test-data/3.fastqsanger test-data/split_pair_reads_1.fastqsanger test-data/split_pair_reads_2.fastqsanger
diffstat 5 files changed, 171 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/split_paired_reads.py	Wed Aug 21 13:20:57 2013 -0400
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+"""
+Split fixed length paired end reads
+"""
+
+import os, sys
+
+if __name__ == '__main__':
+    
+    infile = sys.argv[1]
+    outfile_end1 = open(sys.argv[2], 'w')
+    outfile_end2 = open(sys.argv[3], 'w')
+    
+    i = 0
+    
+    for line in file( infile ):
+        line = line.rstrip()
+        
+        if not line:
+            continue 
+        
+        end1 = ''
+        end2 = ''
+        
+        line_index = i % 4
+        
+        if line_index == 0:
+            end1 = line + '/1'
+            end2 = line + '/2'
+        
+        elif line_index == 1:
+            seq_len = len(line)/2
+            end1 = line[0:seq_len]
+            end2 = line[seq_len:]
+        
+        elif line_index == 2:
+            end1 = line + '/1'
+            end2 = line + '/2'
+        
+        else:
+            qual_len = len(line)/2
+            end1 = line[0:qual_len]
+            end2 = line[qual_len:]
+            
+        outfile_end1.write('%s\n' %(end1))
+        outfile_end2.write('%s\n' %(end2))
+        
+        i += 1
+        
+    if  i % 4 != 0  :
+        sys.stderr.write("WARNING: Number of lines in the input file was not divisible by 4.\nCheck consistency of the input fastq file.\n")
+    outfile_end1.close()
+    outfile_end2.close()    
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/split_paired_reads.xml	Wed Aug 21 13:20:57 2013 -0400
@@ -0,0 +1,57 @@
+<tool id="split_paired_reads" name="Split paired end reads" version="1.0.0">
+  <description></description>
+  <command interpreter="python">
+    split_paired_reads.py $input $output1 $output2
+  </command>
+    <inputs>
+        <param name="input" type="data" format="fastqsanger" label="Your paired-end file" />
+    </inputs>
+    <outputs>
+        <data name="output1" format="fastqsanger"/>
+        <data name="output2" format="fastqsanger"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="3.fastqsanger" ftype="fastqsanger"/>
+            <output name="output1" file="split_pair_reads_1.fastqsanger" ftype="fastqsanger"/>
+            <output name="output2" file="split_pair_reads_2.fastqsanger" ftype="fastqsanger"/>
+        </test>
+    </tests>
+<help>
+        
+**What it does**
+ 
+Splits a single fastq dataset representing paired-end run into two datasets (one for each end). This tool works only for datasets where both ends have **the same** length.  
+
+-----
+
+**Input formats**
+
+A multiple-fastq file, for example::
+
+    @HWI-EAS91_1_30788AAXX:7:21:1542:1758
+    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
+    +HWI-EAS91_1_30788AAXX:7:21:1542:1758
+    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
+
+
+-----
+
+**Outputs**
+
+One end::
+
+    @HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC
+    +HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+
+The other end::
+
+    @HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
+    GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
+    +HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
+    hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
+    
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3.fastqsanger	Wed Aug 21 13:20:57 2013 -0400
@@ -0,0 +1,20 @@
+@HWI-EAS91_1_30788AAXX:7:21:1542:1758
+GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
++HWI-EAS91_1_30788AAXX:7:21:1542:1758
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
+@HWI-EAS91_1_30788AAXX:7:22:1621:462
+ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAAACTAGCCCCAATATCAATCCTATATCAAATCTCACC
++HWI-EAS91_1_30788AAXX:7:22:1621:462
+hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?hhJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh
+@HWI-EAS91_1_30788AAXX:7:45:408:807
+TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTTATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT
++HWI-EAS91_1_30788AAXX:7:45:408:807
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh
+@HWI-EAS91_1_30788AAXX:7:49:654:1439
+CTAACTCTATTTATTGTATTTCAACTAAAAATCTCATAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG
++HWI-EAS91_1_30788AAXX:7:49:654:1439
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@
+@HWI-EAS91_1_30788AAXX:7:64:947:234
+TATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG
++HWI-EAS91_1_30788AAXX:7:64:947:234
+hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJhhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_pair_reads_1.fastqsanger	Wed Aug 21 13:20:57 2013 -0400
@@ -0,0 +1,20 @@
+@HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC
++HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+@HWI-EAS91_1_30788AAXX:7:22:1621:462/1
+ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAA
++HWI-EAS91_1_30788AAXX:7:22:1621:462/1
+hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?h
+@HWI-EAS91_1_30788AAXX:7:45:408:807/1
+TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTT
++HWI-EAS91_1_30788AAXX:7:45:408:807/1
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+@HWI-EAS91_1_30788AAXX:7:49:654:1439/1
+CTAACTCTATTTATTGTATTTCAACTAAAAATCTCA
++HWI-EAS91_1_30788AAXX:7:49:654:1439/1
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+@HWI-EAS91_1_30788AAXX:7:64:947:234/1
+TATCAAAAAAGAATATAATCTGAATCAACACTACAA
++HWI-EAS91_1_30788AAXX:7:64:947:234/1
+hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJ
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_pair_reads_2.fastqsanger	Wed Aug 21 13:20:57 2013 -0400
@@ -0,0 +1,20 @@
+@HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
+GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
++HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
+hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
+@HWI-EAS91_1_30788AAXX:7:22:1621:462/2
+ACTAGCCCCAATATCAATCCTATATCAAATCTCACC
++HWI-EAS91_1_30788AAXX:7:22:1621:462/2
+hJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh
+@HWI-EAS91_1_30788AAXX:7:45:408:807/2
+ATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT
++HWI-EAS91_1_30788AAXX:7:45:408:807/2
+hhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh
+@HWI-EAS91_1_30788AAXX:7:49:654:1439/2
+TAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG
++HWI-EAS91_1_30788AAXX:7:49:654:1439/2
+hhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@
+@HWI-EAS91_1_30788AAXX:7:64:947:234/2
+CCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG
++HWI-EAS91_1_30788AAXX:7:64:947:234/2
+hhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd