annotate kmersvm/scripts/split_genome.py @ 0:66088269713e draft

Uploaded all files tracked by git
author test-svm
date Sun, 05 Aug 2012 15:32:16 -0400
parents
children e8dcc2ed0f9f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
1 import os
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
2 import os.path
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
3 import sys
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
4 import optparse
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
5 import math
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
6 import struct
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
7 import re
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
8 from libkmersvm import *
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
9
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
10 def split(bed_file,options):
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
11 split_f = open(options.output, 'w')
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
12 incr = options.incr
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
13 size = options.size
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
14 file = open(bed_file, 'rb')
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
15
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
16 for line in file:
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
17 (name,start,length) = line.split('\t')
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
18 start = int(start)
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
19 length = int(length)
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
20 end = size + start
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
21
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
22 while True:
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
23 coords = "".join([name,"\t",str(start),"\t",str(end),"\n"])
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
24 split_f.write(coords)
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
25 if end + incr >= length:
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
26 end += incr-((end+incr)-length)
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
27 start += incr
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
28 coords = "".join([name,"\t",str(start),"\t",str(end),"\n"])
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
29 split_f.write(coords)
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
30 break
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
31 else:
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
32 start += incr
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
33 end += incr
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
34
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
35
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
36 def main(argv=sys.argv):
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
37 usage = "usage: %prog <bed_file>"
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
38 parser = optparse.OptionParser(usage=usage)
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
39
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
40 parser.add_option("-s", dest="size", type="int", \
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
41 default=1000, help="set chunk size")
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
42 parser.add_option("-i", dest="incr", type="int", \
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
43 default=500, help="set overlap size")
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
44 parser.add_option("-o", dest="output", default="split_genome_output.bed", \
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
45 help="output BED file (default is split_genome_output.bed)")
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
46
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
47 (options, args) = parser.parse_args()
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
48
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
49 bed_file = args[0]
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
50
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
51 split(bed_file, options)
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
52
66088269713e Uploaded all files tracked by git
test-svm
parents:
diff changeset
53 if __name__ == "__main__": main()