view utils/sim-genome.py @ 18:e4d75f9efb90 draft

planemo upload commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty
author nick
date Thu, 02 Feb 2017 18:44:31 -0500
parents
children
line wrap: on
line source

#!/usr/bin/env python
from __future__ import division
from __future__ import print_function
import sys
import random
import argparse
import numpy
import sim
import fastareader

ARG_DEFAULTS = {'spacing':600, 'indel_rate':0.15, 'ext_rate':0.3}
USAGE = "%(prog)s [options]"
DESCRIPTION = """Generate a version of the input reference with randomly added variants."""


def main(argv):

  parser = argparse.ArgumentParser(description=DESCRIPTION)
  parser.set_defaults(**ARG_DEFAULTS)

  parser.add_argument('ref', type=fastareader.FastaLineGenerator,
    help='The original reference genome (FASTA).')
  parser.add_argument('-m', '--mutations', type=argparse.FileType('w'),
    help='Write inserted mutations here.')
  parser.add_argument('-s', '--spacing', type=int,
    help='Average (or exact) spacing between variants, in bp. Note: must be less than the line '
         'width in the reference file. Default: %(default)s')
  parser.add_argument('-r', '--random', action='store_true',
    help='Randomly distribute the variants instead of evenly spacing them.')
  parser.add_argument('-N', '--ref-name',
    help='Name the output sequence this.')
  parser.add_argument('-i', '--indel-rate', type=float,
    help='Default: %(default)s')
  parser.add_argument('-E', '--extension-rate', dest='ext_rate', type=float,
    help='Default: %(default)s')
  parser.add_argument('-S', '--seed', type=int,
    help='Default: random.')

  args = parser.parse_args(argv[1:])

  if args.seed is None:
    seed = random.randint(0, 2**31-1)
    sys.stderr.write('seed: {}\n'.format(seed))
  else:
    seed = args.seed
  random.seed(seed)

  var_prob = 1/args.spacing

  next_var = args.spacing
  coord = 0
  chr_name = None
  for line in args.ref:
    if args.ref.name != chr_name:
      chr_name = args.ref.name
      if args.ref_name:
        chr_id = args.ref_name.split()[0]
        print('>'+args.ref_name)
      else:
        chr_id = args.ref.id
        print('>'+chr_name)
    end_coord = coord + len(line)
    new_line = line
    if args.random:
      n_vars = numpy.random.binomial(len(line), var_prob)
      for i in range(n_vars):
        vcoord = random.randint(coord+1, end_coord)
        vtype, alt = sim.make_mutation(args.indel_rate, args.ext_rate)
        var = {'coord':vcoord-coord-1, 'type':vtype, 'alt':alt}
        # sys.stderr.write('Adding var at {}: {} ({})\n'.format(vcoord, vtype, alt))
        new_line = sim.apply_mutation(var, new_line)
        if args.mutations:
          var['coord'] = vcoord
          sim.log_mutations(args.mutations, [var], '.', chr_id, '.', '.')
    else:
      if next_var <= end_coord:
        vtype, alt = sim.make_mutation(args.indel_rate, args.ext_rate)
        var = {'coord':next_var-coord-1, 'type':vtype, 'alt':alt}
        # sys.stderr.write('Adding var at {}: {} ({})\n'.format(next_var, vtype, alt))
        new_line = sim.apply_mutation(var, new_line)
        if args.mutations:
          var['coord'] = next_var
          sim.log_mutations(args.mutations, [var], '.', chr_id, '.', '.')
        next_var += args.spacing
    print(new_line)
    coord += len(line)


def fail(message):
  sys.stderr.write(message+"\n")
  sys.exit(1)

if __name__ == '__main__':
  sys.exit(main(sys.argv))