Mercurial > repos > nick > duplex
annotate utils/sim-genome.py @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
rev | line source |
---|---|
18
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
1 #!/usr/bin/env python |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
2 from __future__ import division |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
3 from __future__ import print_function |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
4 import sys |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
5 import random |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
6 import argparse |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
7 import numpy |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
8 import sim |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
9 import fastareader |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
10 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
11 ARG_DEFAULTS = {'spacing':600, 'indel_rate':0.15, 'ext_rate':0.3} |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
12 USAGE = "%(prog)s [options]" |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
13 DESCRIPTION = """Generate a version of the input reference with randomly added variants.""" |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
14 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
15 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
16 def main(argv): |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
17 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
18 parser = argparse.ArgumentParser(description=DESCRIPTION) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
19 parser.set_defaults(**ARG_DEFAULTS) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
20 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
21 parser.add_argument('ref', type=fastareader.FastaLineGenerator, |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
22 help='The original reference genome (FASTA).') |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
23 parser.add_argument('-m', '--mutations', type=argparse.FileType('w'), |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
24 help='Write inserted mutations here.') |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
25 parser.add_argument('-s', '--spacing', type=int, |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
26 help='Average (or exact) spacing between variants, in bp. Note: must be less than the line ' |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
27 'width in the reference file. Default: %(default)s') |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
28 parser.add_argument('-r', '--random', action='store_true', |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
29 help='Randomly distribute the variants instead of evenly spacing them.') |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
30 parser.add_argument('-N', '--ref-name', |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
31 help='Name the output sequence this.') |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
32 parser.add_argument('-i', '--indel-rate', type=float, |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
33 help='Default: %(default)s') |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
34 parser.add_argument('-E', '--extension-rate', dest='ext_rate', type=float, |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
35 help='Default: %(default)s') |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
36 parser.add_argument('-S', '--seed', type=int, |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
37 help='Default: random.') |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
38 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
39 args = parser.parse_args(argv[1:]) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
40 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
41 if args.seed is None: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
42 seed = random.randint(0, 2**31-1) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
43 sys.stderr.write('seed: {}\n'.format(seed)) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
44 else: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
45 seed = args.seed |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
46 random.seed(seed) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
47 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
48 var_prob = 1/args.spacing |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
49 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
50 next_var = args.spacing |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
51 coord = 0 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
52 chr_name = None |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
53 for line in args.ref: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
54 if args.ref.name != chr_name: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
55 chr_name = args.ref.name |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
56 if args.ref_name: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
57 chr_id = args.ref_name.split()[0] |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
58 print('>'+args.ref_name) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
59 else: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
60 chr_id = args.ref.id |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
61 print('>'+chr_name) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
62 end_coord = coord + len(line) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
63 new_line = line |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
64 if args.random: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
65 n_vars = numpy.random.binomial(len(line), var_prob) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
66 for i in range(n_vars): |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
67 vcoord = random.randint(coord+1, end_coord) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
68 vtype, alt = sim.make_mutation(args.indel_rate, args.ext_rate) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
69 var = {'coord':vcoord-coord-1, 'type':vtype, 'alt':alt} |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
70 # sys.stderr.write('Adding var at {}: {} ({})\n'.format(vcoord, vtype, alt)) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
71 new_line = sim.apply_mutation(var, new_line) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
72 if args.mutations: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
73 var['coord'] = vcoord |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
74 sim.log_mutations(args.mutations, [var], '.', chr_id, '.', '.') |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
75 else: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
76 if next_var <= end_coord: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
77 vtype, alt = sim.make_mutation(args.indel_rate, args.ext_rate) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
78 var = {'coord':next_var-coord-1, 'type':vtype, 'alt':alt} |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
79 # sys.stderr.write('Adding var at {}: {} ({})\n'.format(next_var, vtype, alt)) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
80 new_line = sim.apply_mutation(var, new_line) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
81 if args.mutations: |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
82 var['coord'] = next_var |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
83 sim.log_mutations(args.mutations, [var], '.', chr_id, '.', '.') |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
84 next_var += args.spacing |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
85 print(new_line) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
86 coord += len(line) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
87 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
88 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
89 def fail(message): |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
90 sys.stderr.write(message+"\n") |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
91 sys.exit(1) |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
92 |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
93 if __name__ == '__main__': |
e4d75f9efb90
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
nick
parents:
diff
changeset
|
94 sys.exit(main(sys.argv)) |