Mercurial > repos > nick > duplex
comparison utils/subsample.py @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
| author | nick |
|---|---|
| date | Thu, 02 Feb 2017 18:44:31 -0500 |
| parents | af383638de66 |
| children |
comparison
equal
deleted
inserted
replaced
| 17:836fa4fe9494 | 18:e4d75f9efb90 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 from __future__ import division | |
| 3 import sys | |
| 4 import random | |
| 5 import argparse | |
| 6 | |
| 7 OPT_DEFAULTS = {'fraction':0.1, 'seed':1} | |
| 8 USAGE = "%(prog)s [options]" | |
| 9 DESCRIPTION = """""" | |
| 10 | |
| 11 def main(argv): | |
| 12 | |
| 13 parser = argparse.ArgumentParser(description=DESCRIPTION) | |
| 14 parser.set_defaults(**OPT_DEFAULTS) | |
| 15 | |
| 16 parser.add_argument('infile', metavar='read-families.tsv', nargs='?', | |
| 17 help='The input reads, sorted into families.') | |
| 18 parser.add_argument('-f', '--fraction', type=float, | |
| 19 help='Fraction of families to output. Default: %(default)s') | |
| 20 parser.add_argument('-s', '--seed', type=int, | |
| 21 help='Random number generator seed. Default: %(default)s') | |
| 22 | |
| 23 args = parser.parse_args(argv[1:]) | |
| 24 | |
| 25 random.seed(args.seed) | |
| 26 | |
| 27 if args.infile: | |
| 28 infile = open(args.infile) | |
| 29 else: | |
| 30 infile = sys.stdin | |
| 31 | |
| 32 family = [] | |
| 33 last_barcode = None | |
| 34 for line in infile: | |
| 35 fields = line.rstrip('\r\n').split('\t') | |
| 36 if not fields: | |
| 37 continue | |
| 38 barcode = fields[0] | |
| 39 if barcode != last_barcode: | |
| 40 if random.random() <= args.fraction: | |
| 41 sys.stdout.write(''.join(family)) | |
| 42 family = [] | |
| 43 family.append(line) | |
| 44 last_barcode = barcode | |
| 45 | |
| 46 if infile is not sys.stdin: | |
| 47 infile.close() | |
| 48 | |
| 49 | |
| 50 def fail(message): | |
| 51 sys.stderr.write(message+"\n") | |
| 52 sys.exit(1) | |
| 53 | |
| 54 if __name__ == '__main__': | |
| 55 sys.exit(main(sys.argv)) |
