comparison utils/subsample.py @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents af383638de66
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 #!/usr/bin/env python
2 from __future__ import division
3 import sys
4 import random
5 import argparse
6
7 OPT_DEFAULTS = {'fraction':0.1, 'seed':1}
8 USAGE = "%(prog)s [options]"
9 DESCRIPTION = """"""
10
11 def main(argv):
12
13 parser = argparse.ArgumentParser(description=DESCRIPTION)
14 parser.set_defaults(**OPT_DEFAULTS)
15
16 parser.add_argument('infile', metavar='read-families.tsv', nargs='?',
17 help='The input reads, sorted into families.')
18 parser.add_argument('-f', '--fraction', type=float,
19 help='Fraction of families to output. Default: %(default)s')
20 parser.add_argument('-s', '--seed', type=int,
21 help='Random number generator seed. Default: %(default)s')
22
23 args = parser.parse_args(argv[1:])
24
25 random.seed(args.seed)
26
27 if args.infile:
28 infile = open(args.infile)
29 else:
30 infile = sys.stdin
31
32 family = []
33 last_barcode = None
34 for line in infile:
35 fields = line.rstrip('\r\n').split('\t')
36 if not fields:
37 continue
38 barcode = fields[0]
39 if barcode != last_barcode:
40 if random.random() <= args.fraction:
41 sys.stdout.write(''.join(family))
42 family = []
43 family.append(line)
44 last_barcode = barcode
45
46 if infile is not sys.stdin:
47 infile.close()
48
49
50 def fail(message):
51 sys.stderr.write(message+"\n")
52 sys.exit(1)
53
54 if __name__ == '__main__':
55 sys.exit(main(sys.argv))