annotate windowSplitter.py @ 3:1011f028f09d draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
author devteam
date Wed, 03 Jun 2020 13:44:45 +0000
parents 4815516900d4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
2 """
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
3 Split into windows.
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
4
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
5 usage: %prog input size out_file
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
6 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
7 """
3
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
8 from __future__ import print_function
0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
9
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
10 import sys
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
11
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
12 from bx.cookbook import doc_optparse
3
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
13
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
14
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
15 # Default chrom, start, end, strand cols for a bed file
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
16 BED_DEFAULT_COLS = 0, 1, 2, 5
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
17
0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
18
3
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
19 def parse_cols_arg(cols):
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
20 """Parse a columns command line argument into a four-tuple"""
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
21 if cols:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
22 # Handle case where no strand column included - in this case, cols
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
23 # looks something like 1,2,3,
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
24 if cols.endswith(','):
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
25 cols += '0'
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
26 col_list = [int(x) - 1 for x in cols.split(",")]
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
27 return col_list
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
28 else:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
29 return BED_DEFAULT_COLS
0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
30
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
31
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
32 def main():
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
33 # Parsing Command Line here
3
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
34 options, args = doc_optparse.parse(__doc__)
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
35
0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
36 try:
3
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
37 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg(options.cols)
0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
38 inp_file, winsize, out_file, makesliding, offset = args
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
39 winsize = int(winsize)
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
40 offset = int(offset)
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
41 makesliding = int(makesliding)
3
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
42 except Exception:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
43 sys.exit("Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset.")
0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
44
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
45 skipped_lines = 0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
46 first_invalid_line = 0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
47 invalid_line = None
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
48 if offset == 0:
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
49 makesliding = 0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
50
3
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
51 with open(out_file, 'w') as fo, open(inp_file) as fi:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
52 for i, line in enumerate(fi):
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
53 line = line.strip()
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
54 if line and line[0:1] != "#":
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
55 try:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
56 elems = line.split('\t')
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
57 start = int(elems[start_col_1])
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
58 end = int(elems[end_col_1])
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
59 if makesliding == 0:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
60 numwin = (end - start) // winsize
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
61 else:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
62 numwin = (end - start) // offset
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
63 if numwin > 0:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
64 for _ in range(numwin):
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
65 elems_1 = elems
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
66 elems_1[start_col_1] = str(start)
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
67 elems_1[end_col_1] = str(start + winsize)
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
68 fo.write("%s\n" % '\t'.join(elems_1))
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
69 if makesliding == 0:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
70 start = start + winsize
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
71 else:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
72 start = start + offset
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
73 if start + winsize > end:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
74 break
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
75 except Exception:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
76 skipped_lines += 1
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
77 if not invalid_line:
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
78 first_invalid_line = i + 1
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
79 invalid_line = line
0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
80
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
81 if makesliding == 1:
3
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
82 print('Window size=%d, Sliding=Yes, Offset=%d' % (winsize, offset))
0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
83 else:
3
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
84 print('Window size=%d, Sliding=No' % (winsize))
0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
85 if skipped_lines > 0:
3
1011f028f09d "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
86 print('Skipped %d invalid lines starting with #%d: "%s"' % (skipped_lines, first_invalid_line, invalid_line))
0
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
87
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
88
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
89 if __name__ == "__main__":
4815516900d4 Imported from capsule None
devteam
parents:
diff changeset
90 main()