Mercurial > repos > nick > duplex
comparison utils/sim-genome.py @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:836fa4fe9494 | 18:e4d75f9efb90 |
---|---|
1 #!/usr/bin/env python | |
2 from __future__ import division | |
3 from __future__ import print_function | |
4 import sys | |
5 import random | |
6 import argparse | |
7 import numpy | |
8 import sim | |
9 import fastareader | |
10 | |
11 ARG_DEFAULTS = {'spacing':600, 'indel_rate':0.15, 'ext_rate':0.3} | |
12 USAGE = "%(prog)s [options]" | |
13 DESCRIPTION = """Generate a version of the input reference with randomly added variants.""" | |
14 | |
15 | |
16 def main(argv): | |
17 | |
18 parser = argparse.ArgumentParser(description=DESCRIPTION) | |
19 parser.set_defaults(**ARG_DEFAULTS) | |
20 | |
21 parser.add_argument('ref', type=fastareader.FastaLineGenerator, | |
22 help='The original reference genome (FASTA).') | |
23 parser.add_argument('-m', '--mutations', type=argparse.FileType('w'), | |
24 help='Write inserted mutations here.') | |
25 parser.add_argument('-s', '--spacing', type=int, | |
26 help='Average (or exact) spacing between variants, in bp. Note: must be less than the line ' | |
27 'width in the reference file. Default: %(default)s') | |
28 parser.add_argument('-r', '--random', action='store_true', | |
29 help='Randomly distribute the variants instead of evenly spacing them.') | |
30 parser.add_argument('-N', '--ref-name', | |
31 help='Name the output sequence this.') | |
32 parser.add_argument('-i', '--indel-rate', type=float, | |
33 help='Default: %(default)s') | |
34 parser.add_argument('-E', '--extension-rate', dest='ext_rate', type=float, | |
35 help='Default: %(default)s') | |
36 parser.add_argument('-S', '--seed', type=int, | |
37 help='Default: random.') | |
38 | |
39 args = parser.parse_args(argv[1:]) | |
40 | |
41 if args.seed is None: | |
42 seed = random.randint(0, 2**31-1) | |
43 sys.stderr.write('seed: {}\n'.format(seed)) | |
44 else: | |
45 seed = args.seed | |
46 random.seed(seed) | |
47 | |
48 var_prob = 1/args.spacing | |
49 | |
50 next_var = args.spacing | |
51 coord = 0 | |
52 chr_name = None | |
53 for line in args.ref: | |
54 if args.ref.name != chr_name: | |
55 chr_name = args.ref.name | |
56 if args.ref_name: | |
57 chr_id = args.ref_name.split()[0] | |
58 print('>'+args.ref_name) | |
59 else: | |
60 chr_id = args.ref.id | |
61 print('>'+chr_name) | |
62 end_coord = coord + len(line) | |
63 new_line = line | |
64 if args.random: | |
65 n_vars = numpy.random.binomial(len(line), var_prob) | |
66 for i in range(n_vars): | |
67 vcoord = random.randint(coord+1, end_coord) | |
68 vtype, alt = sim.make_mutation(args.indel_rate, args.ext_rate) | |
69 var = {'coord':vcoord-coord-1, 'type':vtype, 'alt':alt} | |
70 # sys.stderr.write('Adding var at {}: {} ({})\n'.format(vcoord, vtype, alt)) | |
71 new_line = sim.apply_mutation(var, new_line) | |
72 if args.mutations: | |
73 var['coord'] = vcoord | |
74 sim.log_mutations(args.mutations, [var], '.', chr_id, '.', '.') | |
75 else: | |
76 if next_var <= end_coord: | |
77 vtype, alt = sim.make_mutation(args.indel_rate, args.ext_rate) | |
78 var = {'coord':next_var-coord-1, 'type':vtype, 'alt':alt} | |
79 # sys.stderr.write('Adding var at {}: {} ({})\n'.format(next_var, vtype, alt)) | |
80 new_line = sim.apply_mutation(var, new_line) | |
81 if args.mutations: | |
82 var['coord'] = next_var | |
83 sim.log_mutations(args.mutations, [var], '.', chr_id, '.', '.') | |
84 next_var += args.spacing | |
85 print(new_line) | |
86 coord += len(line) | |
87 | |
88 | |
89 def fail(message): | |
90 sys.stderr.write(message+"\n") | |
91 sys.exit(1) | |
92 | |
93 if __name__ == '__main__': | |
94 sys.exit(main(sys.argv)) |