comparison utils/sim-genome.py @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
comparison
equal deleted inserted replaced
17:836fa4fe9494 18:e4d75f9efb90
1 #!/usr/bin/env python
2 from __future__ import division
3 from __future__ import print_function
4 import sys
5 import random
6 import argparse
7 import numpy
8 import sim
9 import fastareader
10
11 ARG_DEFAULTS = {'spacing':600, 'indel_rate':0.15, 'ext_rate':0.3}
12 USAGE = "%(prog)s [options]"
13 DESCRIPTION = """Generate a version of the input reference with randomly added variants."""
14
15
16 def main(argv):
17
18 parser = argparse.ArgumentParser(description=DESCRIPTION)
19 parser.set_defaults(**ARG_DEFAULTS)
20
21 parser.add_argument('ref', type=fastareader.FastaLineGenerator,
22 help='The original reference genome (FASTA).')
23 parser.add_argument('-m', '--mutations', type=argparse.FileType('w'),
24 help='Write inserted mutations here.')
25 parser.add_argument('-s', '--spacing', type=int,
26 help='Average (or exact) spacing between variants, in bp. Note: must be less than the line '
27 'width in the reference file. Default: %(default)s')
28 parser.add_argument('-r', '--random', action='store_true',
29 help='Randomly distribute the variants instead of evenly spacing them.')
30 parser.add_argument('-N', '--ref-name',
31 help='Name the output sequence this.')
32 parser.add_argument('-i', '--indel-rate', type=float,
33 help='Default: %(default)s')
34 parser.add_argument('-E', '--extension-rate', dest='ext_rate', type=float,
35 help='Default: %(default)s')
36 parser.add_argument('-S', '--seed', type=int,
37 help='Default: random.')
38
39 args = parser.parse_args(argv[1:])
40
41 if args.seed is None:
42 seed = random.randint(0, 2**31-1)
43 sys.stderr.write('seed: {}\n'.format(seed))
44 else:
45 seed = args.seed
46 random.seed(seed)
47
48 var_prob = 1/args.spacing
49
50 next_var = args.spacing
51 coord = 0
52 chr_name = None
53 for line in args.ref:
54 if args.ref.name != chr_name:
55 chr_name = args.ref.name
56 if args.ref_name:
57 chr_id = args.ref_name.split()[0]
58 print('>'+args.ref_name)
59 else:
60 chr_id = args.ref.id
61 print('>'+chr_name)
62 end_coord = coord + len(line)
63 new_line = line
64 if args.random:
65 n_vars = numpy.random.binomial(len(line), var_prob)
66 for i in range(n_vars):
67 vcoord = random.randint(coord+1, end_coord)
68 vtype, alt = sim.make_mutation(args.indel_rate, args.ext_rate)
69 var = {'coord':vcoord-coord-1, 'type':vtype, 'alt':alt}
70 # sys.stderr.write('Adding var at {}: {} ({})\n'.format(vcoord, vtype, alt))
71 new_line = sim.apply_mutation(var, new_line)
72 if args.mutations:
73 var['coord'] = vcoord
74 sim.log_mutations(args.mutations, [var], '.', chr_id, '.', '.')
75 else:
76 if next_var <= end_coord:
77 vtype, alt = sim.make_mutation(args.indel_rate, args.ext_rate)
78 var = {'coord':next_var-coord-1, 'type':vtype, 'alt':alt}
79 # sys.stderr.write('Adding var at {}: {} ({})\n'.format(next_var, vtype, alt))
80 new_line = sim.apply_mutation(var, new_line)
81 if args.mutations:
82 var['coord'] = next_var
83 sim.log_mutations(args.mutations, [var], '.', chr_id, '.', '.')
84 next_var += args.spacing
85 print(new_line)
86 coord += len(line)
87
88
89 def fail(message):
90 sys.stderr.write(message+"\n")
91 sys.exit(1)
92
93 if __name__ == '__main__':
94 sys.exit(main(sys.argv))