Mercurial > repos > test-svm > kmersvm_test
comparison kmersvm/scripts/split_genome.py @ 0:66088269713e draft
Uploaded all files tracked by git
| author | test-svm |
|---|---|
| date | Sun, 05 Aug 2012 15:32:16 -0400 |
| parents | |
| children | e8dcc2ed0f9f |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:66088269713e |
|---|---|
| 1 import os | |
| 2 import os.path | |
| 3 import sys | |
| 4 import optparse | |
| 5 import math | |
| 6 import struct | |
| 7 import re | |
| 8 from libkmersvm import * | |
| 9 | |
| 10 def split(bed_file,options): | |
| 11 split_f = open(options.output, 'w') | |
| 12 incr = options.incr | |
| 13 size = options.size | |
| 14 file = open(bed_file, 'rb') | |
| 15 | |
| 16 for line in file: | |
| 17 (name,start,length) = line.split('\t') | |
| 18 start = int(start) | |
| 19 length = int(length) | |
| 20 end = size + start | |
| 21 | |
| 22 while True: | |
| 23 coords = "".join([name,"\t",str(start),"\t",str(end),"\n"]) | |
| 24 split_f.write(coords) | |
| 25 if end + incr >= length: | |
| 26 end += incr-((end+incr)-length) | |
| 27 start += incr | |
| 28 coords = "".join([name,"\t",str(start),"\t",str(end),"\n"]) | |
| 29 split_f.write(coords) | |
| 30 break | |
| 31 else: | |
| 32 start += incr | |
| 33 end += incr | |
| 34 | |
| 35 | |
| 36 def main(argv=sys.argv): | |
| 37 usage = "usage: %prog <bed_file>" | |
| 38 parser = optparse.OptionParser(usage=usage) | |
| 39 | |
| 40 parser.add_option("-s", dest="size", type="int", \ | |
| 41 default=1000, help="set chunk size") | |
| 42 parser.add_option("-i", dest="incr", type="int", \ | |
| 43 default=500, help="set overlap size") | |
| 44 parser.add_option("-o", dest="output", default="split_genome_output.bed", \ | |
| 45 help="output BED file (default is split_genome_output.bed)") | |
| 46 | |
| 47 (options, args) = parser.parse_args() | |
| 48 | |
| 49 bed_file = args[0] | |
| 50 | |
| 51 split(bed_file, options) | |
| 52 | |
| 53 if __name__ == "__main__": main() |
