Mercurial > repos > devteam > fastq_paired_end_interlacer
comparison fastq_paired_end_interlacer.py @ 0:cfc3ad769dba draft
Imported from capsule None
author | devteam |
---|---|
date | Thu, 23 Jan 2014 12:31:16 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cfc3ad769dba |
---|---|
1 #Florent Angly | |
2 import sys | |
3 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner | |
4 | |
5 def main(): | |
6 mate1_filename = sys.argv[1] | |
7 mate1_type = sys.argv[2] or 'sanger' | |
8 mate2_filename = sys.argv[3] | |
9 mate2_type = sys.argv[4] or 'sanger' | |
10 outfile_pairs = sys.argv[5] | |
11 outfile_singles = sys.argv[6] | |
12 | |
13 if mate1_type != mate2_type: | |
14 print "WARNING: You are trying to interlace files of two different types: %s and %s." % ( mate1_type, mate2_type ) | |
15 return | |
16 | |
17 type = mate1_type | |
18 joiner = fastqJoiner( type ) | |
19 out_pairs = fastqWriter( open( outfile_pairs, 'wb' ), format = type ) | |
20 out_singles = fastqWriter( open( outfile_singles, 'wb' ), format = type ) | |
21 | |
22 # Pairs + singles present in mate1 | |
23 nof_singles = 0 | |
24 nof_pairs = 0 | |
25 mate2_input = fastqNamedReader( open( mate2_filename, 'rb' ), format = type ) | |
26 i = None | |
27 for i, mate1 in enumerate( fastqReader( open( mate1_filename, 'rb' ), format = type ) ): | |
28 mate2 = mate2_input.get( joiner.get_paired_identifier( mate1 ) ) | |
29 if mate2: | |
30 out_pairs.write( mate1 ) | |
31 out_pairs.write( mate2 ) | |
32 nof_pairs += 1 | |
33 else: | |
34 out_singles.write( mate1 ) | |
35 nof_singles += 1 | |
36 | |
37 # Singles present in mate2 | |
38 mate1_input = fastqNamedReader( open( mate1_filename, 'rb' ), format = type ) | |
39 j = None | |
40 for j, mate2 in enumerate( fastqReader( open( mate2_filename, 'rb' ), format = type ) ): | |
41 mate1 = mate1_input.get( joiner.get_paired_identifier( mate2 ) ) | |
42 if not mate1: | |
43 out_singles.write( mate2 ) | |
44 nof_singles += 1 | |
45 | |
46 if (i is None) and (j is None): | |
47 print "Your input files contained no valid FASTQ sequences." | |
48 else: | |
49 print 'There were %s single reads.' % ( nof_singles ) | |
50 print 'Interlaced %s pairs of sequences.' % ( nof_pairs ) | |
51 | |
52 mate1_input.close() | |
53 mate2_input.close() | |
54 out_pairs.close() | |
55 out_singles.close() | |
56 | |
57 | |
58 if __name__ == "__main__": | |
59 main() |