Mercurial > repos > test-svm > kmersvm_test
comparison kmersvm/scripts/split_genome.py @ 5:f99b5099ea55 draft
Uploaded
| author | test-svm |
|---|---|
| date | Sun, 05 Aug 2012 16:50:57 -0400 |
| parents | |
| children | 3b0c30b3baf1 |
comparison
equal
deleted
inserted
replaced
| 4:f2130156fd5d | 5:f99b5099ea55 |
|---|---|
| 1 import os | |
| 2 import os.path | |
| 3 import sys | |
| 4 import optparse | |
| 5 import math | |
| 6 import re | |
| 7 from libkmersvm import * | |
| 8 | |
| 9 def split(bed_file,options): | |
| 10 split_f = open(options.output, 'w') | |
| 11 incr = options.incr | |
| 12 size = options.size | |
| 13 file = open(bed_file, 'rb') | |
| 14 | |
| 15 for line in file: | |
| 16 (name,start,length) = line.split('\t') | |
| 17 start = int(start) | |
| 18 length = int(length) | |
| 19 end = size + start | |
| 20 | |
| 21 while True: | |
| 22 coords = "".join([name,"\t",str(start),"\t",str(end),"\n"]) | |
| 23 split_f.write(coords) | |
| 24 if end + incr >= length: | |
| 25 end += incr-((end+incr)-length) | |
| 26 start += incr | |
| 27 coords = "".join([name,"\t",str(start),"\t",str(end),"\n"]) | |
| 28 split_f.write(coords) | |
| 29 break | |
| 30 else: | |
| 31 start += incr | |
| 32 end += incr | |
| 33 | |
| 34 | |
| 35 def main(argv=sys.argv): | |
| 36 usage = "usage: %prog <bed_file>" | |
| 37 parser = optparse.OptionParser(usage=usage) | |
| 38 | |
| 39 parser.add_option("-s", dest="size", type="int", \ | |
| 40 default=1000, help="set chunk size") | |
| 41 parser.add_option("-i", dest="incr", type="int", \ | |
| 42 default=500, help="set overlap size") | |
| 43 parser.add_option("-o", dest="output", default="split_genome_output.bed", \ | |
| 44 help="output BED file (default is split_genome_output.bed)") | |
| 45 | |
| 46 (options, args) = parser.parse_args() | |
| 47 | |
| 48 bed_file = args[0] | |
| 49 | |
| 50 split(bed_file, options) | |
| 51 | |
| 52 if __name__ == "__main__": main() |
