Mercurial > repos > nick > duplex
comparison utils/subsample.py @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | af383638de66 |
children |
comparison
equal
deleted
inserted
replaced
17:836fa4fe9494 | 18:e4d75f9efb90 |
---|---|
1 #!/usr/bin/env python | |
2 from __future__ import division | |
3 import sys | |
4 import random | |
5 import argparse | |
6 | |
7 OPT_DEFAULTS = {'fraction':0.1, 'seed':1} | |
8 USAGE = "%(prog)s [options]" | |
9 DESCRIPTION = """""" | |
10 | |
11 def main(argv): | |
12 | |
13 parser = argparse.ArgumentParser(description=DESCRIPTION) | |
14 parser.set_defaults(**OPT_DEFAULTS) | |
15 | |
16 parser.add_argument('infile', metavar='read-families.tsv', nargs='?', | |
17 help='The input reads, sorted into families.') | |
18 parser.add_argument('-f', '--fraction', type=float, | |
19 help='Fraction of families to output. Default: %(default)s') | |
20 parser.add_argument('-s', '--seed', type=int, | |
21 help='Random number generator seed. Default: %(default)s') | |
22 | |
23 args = parser.parse_args(argv[1:]) | |
24 | |
25 random.seed(args.seed) | |
26 | |
27 if args.infile: | |
28 infile = open(args.infile) | |
29 else: | |
30 infile = sys.stdin | |
31 | |
32 family = [] | |
33 last_barcode = None | |
34 for line in infile: | |
35 fields = line.rstrip('\r\n').split('\t') | |
36 if not fields: | |
37 continue | |
38 barcode = fields[0] | |
39 if barcode != last_barcode: | |
40 if random.random() <= args.fraction: | |
41 sys.stdout.write(''.join(family)) | |
42 family = [] | |
43 family.append(line) | |
44 last_barcode = barcode | |
45 | |
46 if infile is not sys.stdin: | |
47 infile.close() | |
48 | |
49 | |
50 def fail(message): | |
51 sys.stderr.write(message+"\n") | |
52 sys.exit(1) | |
53 | |
54 if __name__ == '__main__': | |
55 sys.exit(main(sys.argv)) |