Mercurial > repos > scottx611x > cooler_convert
annotate recursive_agg_onefile.py @ 42:ff10e8784d4e draft default tip
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
| author | scottx611x |
|---|---|
| date | Tue, 22 Nov 2016 14:21:59 -0500 |
| parents | 1daf2cd88f17 |
| children |
| rev | line source |
|---|---|
|
35
7ede301a13f8
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
34
diff
changeset
|
1 from __future__ import division, print_function |
|
33
51af070be3e5
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
22
diff
changeset
|
2 import sys |
|
36
79176387b351
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
35
diff
changeset
|
3 |
|
38
1daf2cd88f17
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
37
diff
changeset
|
4 sys.path.append("/usr/local/lib/python2.7/dist-packages") |
|
36
79176387b351
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
35
diff
changeset
|
5 |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
6 from multiprocessing import Pool |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
7 import argparse |
|
22
90005893d112
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
21
diff
changeset
|
8 import warnings |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
9 |
|
5
4543c0fdd563
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
4
diff
changeset
|
10 import numpy as np |
|
10
f0d4e43f95b5
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
5
diff
changeset
|
11 import cooler |
|
5
4543c0fdd563
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
4
diff
changeset
|
12 from cooler.io import CoolerAggregator |
|
4543c0fdd563
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
4
diff
changeset
|
13 import cooler.ice |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
14 import h5py |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
15 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
16 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
17 FACTOR = 2 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
18 TILESIZE = 256 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
19 N_CPU = 8 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
20 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
21 def main(infile, outfile, chunksize): |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
22 c = cooler.Cooler(infile) |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
23 binsize = c.info['bin-size'] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
24 chromtable = c.chroms()[:] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
25 chromsizes = chromtable.set_index('name')['length'] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
26 chroms = chromtable['name'].values |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
27 lengths = chromtable['length'].values |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
28 total_length = np.sum(chromsizes.values) |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
29 n_tiles = total_length / binsize / TILESIZE |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
30 n_zooms = int(np.ceil(np.log2(n_tiles))) |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
31 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
32 print( |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
33 "Copying base matrix to level {0} and producing {0} zoom levels starting from 0...".format(n_zooms), |
|
21
202ef48959d3
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
20
diff
changeset
|
34 file=sys.stdout |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
35 ) |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
36 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
37 # transfer base matrix |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
38 with h5py.File(outfile, 'w') as dest, \ |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
39 h5py.File(infile, 'r') as src: |
|
20
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
40 zoomLevel = str(n_zooms) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
41 src.copy('/', dest, zoomLevel) |
|
21
202ef48959d3
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
20
diff
changeset
|
42 print(zoomLevel, file=sys.stdout) |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
43 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
44 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
45 # produce aggregations |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
46 with h5py.File(outfile, 'r+') as f: |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
47 grp = f[str(n_zooms)] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
48 c = cooler.Cooler(grp) |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
49 binsize = cooler.info(grp)['bin-size'] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
50 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
51 for i in range(n_zooms - 1, -1, -1): |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
52 zoomLevel = str(i) |
|
19
70d6350f982d
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
18
diff
changeset
|
53 |
|
20
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
54 # aggregate |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
55 new_binsize = binsize * FACTOR |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
56 new_bins = cooler.util.binnify(chromsizes, new_binsize) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
57 |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
58 reader = CoolerAggregator(c, new_bins, chunksize) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
59 |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
60 grp = f.create_group(zoomLevel) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
61 f.attrs[zoomLevel] = new_binsize |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
62 cooler.io.create(grp, chroms, lengths, new_bins, reader) |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
63 |
|
20
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
64 # balance |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
65 #with Pool(N_CPU) as pool: |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
66 too_close = 20000 # for HindIII |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
67 ignore_diags = max(int(np.ceil(too_close / new_binsize)), 3) |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
68 |
|
20
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
69 bias = cooler.ice.iterative_correction( |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
70 f, zoomLevel, |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
71 chunksize=chunksize, |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
72 min_nnz=10, |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
73 mad_max=3, |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
74 ignore_diags=ignore_diags, |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
75 map=map) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
76 h5opts = dict(compression='gzip', compression_opts=6) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
77 grp['bins'].create_dataset('weight', data=bias, **h5opts) |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
78 |
|
21
202ef48959d3
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
20
diff
changeset
|
79 print(zoomLevel, file=sys.stdout) |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
80 |
|
20
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
81 c = cooler.Cooler(grp) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
82 binsize = new_binsize |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
83 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
84 if __name__ == '__main__': |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
85 parser = argparse.ArgumentParser( |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
86 description="Recursively aggregate a single resolution cooler file into a multi-resolution file.") |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
87 parser.add_argument( |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
88 "cooler_file", |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
89 help="Cooler file", |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
90 metavar="COOLER_PATH") |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
91 parser.add_argument( |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
92 "--out", "-o", |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
93 help="Output text file") |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
94 args = vars(parser.parse_args()) |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
95 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
96 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
97 infile = args['cooler_file'] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
98 if args['out'] is None: |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
99 outfile = infile.replace('.cool', '.multires.cool') |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
100 else: |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
101 outfile = args['out'] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
102 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
103 chunksize = int(1e6) |
|
22
90005893d112
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
21
diff
changeset
|
104 with warnings.catch_warnings(): |
|
90005893d112
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
21
diff
changeset
|
105 warnings.simplefilter("ignore") |
|
90005893d112
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
21
diff
changeset
|
106 main(infile, outfile, chunksize) |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
107 |
