|
0
|
1 import os
|
|
|
2 import os.path
|
|
|
3 import sys
|
|
|
4 import optparse
|
|
|
5 import math
|
|
|
6 import struct
|
|
|
7 import re
|
|
|
8 from libkmersvm import *
|
|
|
9
|
|
|
10 def split(bed_file,options):
|
|
|
11 split_f = open(options.output, 'w')
|
|
|
12 incr = options.incr
|
|
|
13 size = options.size
|
|
|
14 file = open(bed_file, 'rb')
|
|
|
15
|
|
|
16 for line in file:
|
|
|
17 (name,start,length) = line.split('\t')
|
|
|
18 start = int(start)
|
|
|
19 length = int(length)
|
|
|
20 end = size + start
|
|
|
21
|
|
|
22 while True:
|
|
|
23 coords = "".join([name,"\t",str(start),"\t",str(end),"\n"])
|
|
|
24 split_f.write(coords)
|
|
|
25 if end + incr >= length:
|
|
|
26 end += incr-((end+incr)-length)
|
|
|
27 start += incr
|
|
|
28 coords = "".join([name,"\t",str(start),"\t",str(end),"\n"])
|
|
|
29 split_f.write(coords)
|
|
|
30 break
|
|
|
31 else:
|
|
|
32 start += incr
|
|
|
33 end += incr
|
|
|
34
|
|
|
35
|
|
|
36 def main(argv=sys.argv):
|
|
|
37 usage = "usage: %prog <bed_file>"
|
|
|
38 parser = optparse.OptionParser(usage=usage)
|
|
|
39
|
|
|
40 parser.add_option("-s", dest="size", type="int", \
|
|
|
41 default=1000, help="set chunk size")
|
|
|
42 parser.add_option("-i", dest="incr", type="int", \
|
|
|
43 default=500, help="set overlap size")
|
|
|
44 parser.add_option("-o", dest="output", default="split_genome_output.bed", \
|
|
|
45 help="output BED file (default is split_genome_output.bed)")
|
|
|
46
|
|
|
47 (options, args) = parser.parse_args()
|
|
|
48
|
|
|
49 bed_file = args[0]
|
|
|
50
|
|
|
51 split(bed_file, options)
|
|
|
52
|
|
|
53 if __name__ == "__main__": main()
|