comparison kmersvm/scripts/split_genome.py @ 5:f99b5099ea55 draft

Uploaded
author test-svm
date Sun, 05 Aug 2012 16:50:57 -0400
parents
children 3b0c30b3baf1
comparison
equal deleted inserted replaced
4:f2130156fd5d 5:f99b5099ea55
1 import os
2 import os.path
3 import sys
4 import optparse
5 import math
6 import re
7 from libkmersvm import *
8
9 def split(bed_file,options):
10 split_f = open(options.output, 'w')
11 incr = options.incr
12 size = options.size
13 file = open(bed_file, 'rb')
14
15 for line in file:
16 (name,start,length) = line.split('\t')
17 start = int(start)
18 length = int(length)
19 end = size + start
20
21 while True:
22 coords = "".join([name,"\t",str(start),"\t",str(end),"\n"])
23 split_f.write(coords)
24 if end + incr >= length:
25 end += incr-((end+incr)-length)
26 start += incr
27 coords = "".join([name,"\t",str(start),"\t",str(end),"\n"])
28 split_f.write(coords)
29 break
30 else:
31 start += incr
32 end += incr
33
34
35 def main(argv=sys.argv):
36 usage = "usage: %prog <bed_file>"
37 parser = optparse.OptionParser(usage=usage)
38
39 parser.add_option("-s", dest="size", type="int", \
40 default=1000, help="set chunk size")
41 parser.add_option("-i", dest="incr", type="int", \
42 default=500, help="set overlap size")
43 parser.add_option("-o", dest="output", default="split_genome_output.bed", \
44 help="output BED file (default is split_genome_output.bed)")
45
46 (options, args) = parser.parse_args()
47
48 bed_file = args[0]
49
50 split(bed_file, options)
51
52 if __name__ == "__main__": main()