Mercurial > repos > devteam > windowsplitter
annotate windowSplitter.py @ 3:1011f028f09d draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
author | devteam |
---|---|
date | Wed, 03 Jun 2020 13:44:45 +0000 |
parents | 4815516900d4 |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 """ | |
3 Split into windows. | |
4 | |
5 usage: %prog input size out_file | |
6 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file | |
7 """ | |
3
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
8 from __future__ import print_function |
0 | 9 |
10 import sys | |
11 | |
12 from bx.cookbook import doc_optparse | |
3
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
13 |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
14 |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
15 # Default chrom, start, end, strand cols for a bed file |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
16 BED_DEFAULT_COLS = 0, 1, 2, 5 |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
17 |
0 | 18 |
3
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
19 def parse_cols_arg(cols): |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
20 """Parse a columns command line argument into a four-tuple""" |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
21 if cols: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
22 # Handle case where no strand column included - in this case, cols |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
23 # looks something like 1,2,3, |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
24 if cols.endswith(','): |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
25 cols += '0' |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
26 col_list = [int(x) - 1 for x in cols.split(",")] |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
27 return col_list |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
28 else: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
29 return BED_DEFAULT_COLS |
0 | 30 |
31 | |
32 def main(): | |
33 # Parsing Command Line here | |
3
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
34 options, args = doc_optparse.parse(__doc__) |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
35 |
0 | 36 try: |
3
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
37 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg(options.cols) |
0 | 38 inp_file, winsize, out_file, makesliding, offset = args |
39 winsize = int(winsize) | |
40 offset = int(offset) | |
41 makesliding = int(makesliding) | |
3
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
42 except Exception: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
43 sys.exit("Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset.") |
0 | 44 |
45 skipped_lines = 0 | |
46 first_invalid_line = 0 | |
47 invalid_line = None | |
48 if offset == 0: | |
49 makesliding = 0 | |
50 | |
3
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
51 with open(out_file, 'w') as fo, open(inp_file) as fi: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
52 for i, line in enumerate(fi): |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
53 line = line.strip() |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
54 if line and line[0:1] != "#": |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
55 try: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
56 elems = line.split('\t') |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
57 start = int(elems[start_col_1]) |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
58 end = int(elems[end_col_1]) |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
59 if makesliding == 0: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
60 numwin = (end - start) // winsize |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
61 else: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
62 numwin = (end - start) // offset |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
63 if numwin > 0: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
64 for _ in range(numwin): |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
65 elems_1 = elems |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
66 elems_1[start_col_1] = str(start) |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
67 elems_1[end_col_1] = str(start + winsize) |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
68 fo.write("%s\n" % '\t'.join(elems_1)) |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
69 if makesliding == 0: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
70 start = start + winsize |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
71 else: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
72 start = start + offset |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
73 if start + winsize > end: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
74 break |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
75 except Exception: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
76 skipped_lines += 1 |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
77 if not invalid_line: |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
78 first_invalid_line = i + 1 |
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
79 invalid_line = line |
0 | 80 |
81 if makesliding == 1: | |
3
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
82 print('Window size=%d, Sliding=Yes, Offset=%d' % (winsize, offset)) |
0 | 83 else: |
3
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
84 print('Window size=%d, Sliding=No' % (winsize)) |
0 | 85 if skipped_lines > 0: |
3
1011f028f09d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents:
0
diff
changeset
|
86 print('Skipped %d invalid lines starting with #%d: "%s"' % (skipped_lines, first_invalid_line, invalid_line)) |
0 | 87 |
88 | |
89 if __name__ == "__main__": | |
90 main() |