Mercurial > repos > nick > duplex
view utils/sim-genome.py @ 18:e4d75f9efb90 draft
planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author | nick |
---|---|
date | Thu, 02 Feb 2017 18:44:31 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python from __future__ import division from __future__ import print_function import sys import random import argparse import numpy import sim import fastareader ARG_DEFAULTS = {'spacing':600, 'indel_rate':0.15, 'ext_rate':0.3} USAGE = "%(prog)s [options]" DESCRIPTION = """Generate a version of the input reference with randomly added variants.""" def main(argv): parser = argparse.ArgumentParser(description=DESCRIPTION) parser.set_defaults(**ARG_DEFAULTS) parser.add_argument('ref', type=fastareader.FastaLineGenerator, help='The original reference genome (FASTA).') parser.add_argument('-m', '--mutations', type=argparse.FileType('w'), help='Write inserted mutations here.') parser.add_argument('-s', '--spacing', type=int, help='Average (or exact) spacing between variants, in bp. Note: must be less than the line ' 'width in the reference file. Default: %(default)s') parser.add_argument('-r', '--random', action='store_true', help='Randomly distribute the variants instead of evenly spacing them.') parser.add_argument('-N', '--ref-name', help='Name the output sequence this.') parser.add_argument('-i', '--indel-rate', type=float, help='Default: %(default)s') parser.add_argument('-E', '--extension-rate', dest='ext_rate', type=float, help='Default: %(default)s') parser.add_argument('-S', '--seed', type=int, help='Default: random.') args = parser.parse_args(argv[1:]) if args.seed is None: seed = random.randint(0, 2**31-1) sys.stderr.write('seed: {}\n'.format(seed)) else: seed = args.seed random.seed(seed) var_prob = 1/args.spacing next_var = args.spacing coord = 0 chr_name = None for line in args.ref: if args.ref.name != chr_name: chr_name = args.ref.name if args.ref_name: chr_id = args.ref_name.split()[0] print('>'+args.ref_name) else: chr_id = args.ref.id print('>'+chr_name) end_coord = coord + len(line) new_line = line if args.random: n_vars = numpy.random.binomial(len(line), var_prob) for i in range(n_vars): vcoord = random.randint(coord+1, end_coord) vtype, alt = sim.make_mutation(args.indel_rate, args.ext_rate) var = {'coord':vcoord-coord-1, 'type':vtype, 'alt':alt} # sys.stderr.write('Adding var at {}: {} ({})\n'.format(vcoord, vtype, alt)) new_line = sim.apply_mutation(var, new_line) if args.mutations: var['coord'] = vcoord sim.log_mutations(args.mutations, [var], '.', chr_id, '.', '.') else: if next_var <= end_coord: vtype, alt = sim.make_mutation(args.indel_rate, args.ext_rate) var = {'coord':next_var-coord-1, 'type':vtype, 'alt':alt} # sys.stderr.write('Adding var at {}: {} ({})\n'.format(next_var, vtype, alt)) new_line = sim.apply_mutation(var, new_line) if args.mutations: var['coord'] = next_var sim.log_mutations(args.mutations, [var], '.', chr_id, '.', '.') next_var += args.spacing print(new_line) coord += len(line) def fail(message): sys.stderr.write(message+"\n") sys.exit(1) if __name__ == '__main__': sys.exit(main(sys.argv))