annotate get_pairs.py @ 0:d790774a4e95 draft default tip

planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
author genouest
date Mon, 10 Sep 2018 10:16:23 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
1 #!/opt/python/bin/python
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
2 # -*- coding: utf-8 -*-
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
3 # ----------------------------------------------------------
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
4 # --
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
5 # -- author : Pierre Pericard
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
6 # -- created : 2012-11-09
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
7 # -- modified: 2013-05-23
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
8 # --
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
9 # ----------------------------------------------------------
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
10 # --
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
11 # -- description : Get separately paired reads and singletons
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
12 # -- from two fastq files (left and right)
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
13 # --
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
14 # -- get_pairs.py file1.fastq file2.fastq
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
15 # --
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
16 # ----------------------------------------------------------
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
17
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
18 import argparse
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
19 import sys
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
20
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
21
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
22 if __name__ == '__main__':
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
23
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
24 # Arguments
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
25 parser = argparse.ArgumentParser(description='Get separately paired reads and singletons from two fastq files (left and right)')
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
26 parser.add_argument('leftreads', metavar='leftreads', type=argparse.FileType('r'), help='left reads fastq')
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
27 parser.add_argument('rightreads', metavar='rightreads', type=argparse.FileType('r'), help='right reads fastq')
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
28
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
29 args = parser.parse_args()
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
30
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
31 leftreads = args.leftreads.name
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
32 rightreads = args.rightreads.name
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
33
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
34 (n1, n2) = (list(), list())
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
35
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
36 for f, n in ((leftreads, n1), (rightreads, n2)):
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
37 with open(f, 'r') as fh:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
38 c = 0
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
39 for line in fh:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
40 line = line.strip()
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
41 if line:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
42 c += 1
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
43 if c % 4 == 1:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
44 n.append(line.split()[0][1:].split('/')[0])
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
45 if c % 40000 == 1:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
46 sys.stdout.write("\r%.2f M reads read" % (c / 4000000.0))
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
47 sys.stdout.write("\r%.2f M reads read\n" % (c / 4000000.0))
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
48
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
49 notcommon = set(n1) ^ set(n2)
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
50
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
51 for f in (leftreads, rightreads):
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
52
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
53 if f == leftreads:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
54 basefilename = "left"
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
55 else:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
56 basefilename = "right"
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
57
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
58 pfh = open(basefilename + '.paired.fastq', 'w')
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
59 ufh = open(basefilename + '.unpaired.fastq', 'w')
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
60 with open(f, 'r') as fh:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
61 c = 0
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
62 paired = False
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
63 for line in fh:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
64 line = line.strip()
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
65 if line:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
66 c += 1
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
67 if c % 4 == 1:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
68 paired = line.split()[0][1:].split('/')[0] not in notcommon
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
69 if c % 40000 == 1:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
70 sys.stdout.write("\r%.2f M reads writen" % (c / 4000000.0))
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
71 if paired:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
72 pfh.write("%s\n" % line)
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
73 else:
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
74 ufh.write("%s\n" % line)
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
75 sys.stdout.write("\r%.2f M reads writen\n" % (c / 4000000.0))
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
76 pfh.close()
d790774a4e95 planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/get_pairs commit 02689a4f03aff06ce7f56ffd329ad72befdc692c
genouest
parents:
diff changeset
77 ufh.close()