Mercurial > repos > scottx611x > cooler_convert
annotate recursive_agg_onefile.py @ 37:8e5ca9b2b63d draft
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
| author | scottx611x |
|---|---|
| date | Mon, 21 Nov 2016 17:33:22 -0500 |
| parents | 79176387b351 |
| children | 1daf2cd88f17 |
| rev | line source |
|---|---|
|
35
7ede301a13f8
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
34
diff
changeset
|
1 from __future__ import division, print_function |
|
33
51af070be3e5
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
22
diff
changeset
|
2 import sys |
|
36
79176387b351
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
35
diff
changeset
|
3 |
|
33
51af070be3e5
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
22
diff
changeset
|
4 print(sys.executable) |
|
51af070be3e5
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
22
diff
changeset
|
5 |
|
36
79176387b351
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
35
diff
changeset
|
6 |
|
79176387b351
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
35
diff
changeset
|
7 import pip |
|
79176387b351
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
35
diff
changeset
|
8 installed_packages = pip.get_installed_distributions() |
|
79176387b351
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
35
diff
changeset
|
9 installed_packages_list = sorted(["%s==%s" % (i.key, i.version) for i in installed_packages]) |
|
79176387b351
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
35
diff
changeset
|
10 print(installed_packages_list) |
|
37
8e5ca9b2b63d
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
36
diff
changeset
|
11 print(pip.main(["--version"])) |
|
36
79176387b351
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
35
diff
changeset
|
12 |
|
79176387b351
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
35
diff
changeset
|
13 |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
14 from multiprocessing import Pool |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
15 import argparse |
|
22
90005893d112
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
21
diff
changeset
|
16 import warnings |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
17 |
|
5
4543c0fdd563
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
4
diff
changeset
|
18 import numpy as np |
|
10
f0d4e43f95b5
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
5
diff
changeset
|
19 import cooler |
|
5
4543c0fdd563
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
4
diff
changeset
|
20 from cooler.io import CoolerAggregator |
|
4543c0fdd563
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
4
diff
changeset
|
21 import cooler.ice |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
22 import h5py |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
23 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
24 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
25 FACTOR = 2 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
26 TILESIZE = 256 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
27 N_CPU = 8 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
28 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
29 def main(infile, outfile, chunksize): |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
30 c = cooler.Cooler(infile) |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
31 binsize = c.info['bin-size'] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
32 chromtable = c.chroms()[:] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
33 chromsizes = chromtable.set_index('name')['length'] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
34 chroms = chromtable['name'].values |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
35 lengths = chromtable['length'].values |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
36 total_length = np.sum(chromsizes.values) |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
37 n_tiles = total_length / binsize / TILESIZE |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
38 n_zooms = int(np.ceil(np.log2(n_tiles))) |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
39 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
40 print( |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
41 "Copying base matrix to level {0} and producing {0} zoom levels starting from 0...".format(n_zooms), |
|
21
202ef48959d3
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
20
diff
changeset
|
42 file=sys.stdout |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
43 ) |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
44 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
45 # transfer base matrix |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
46 with h5py.File(outfile, 'w') as dest, \ |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
47 h5py.File(infile, 'r') as src: |
|
20
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
48 zoomLevel = str(n_zooms) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
49 src.copy('/', dest, zoomLevel) |
|
21
202ef48959d3
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
20
diff
changeset
|
50 print(zoomLevel, file=sys.stdout) |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
51 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
52 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
53 # produce aggregations |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
54 with h5py.File(outfile, 'r+') as f: |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
55 grp = f[str(n_zooms)] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
56 c = cooler.Cooler(grp) |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
57 binsize = cooler.info(grp)['bin-size'] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
58 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
59 for i in range(n_zooms - 1, -1, -1): |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
60 zoomLevel = str(i) |
|
19
70d6350f982d
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
18
diff
changeset
|
61 |
|
20
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
62 # aggregate |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
63 new_binsize = binsize * FACTOR |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
64 new_bins = cooler.util.binnify(chromsizes, new_binsize) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
65 |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
66 reader = CoolerAggregator(c, new_bins, chunksize) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
67 |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
68 grp = f.create_group(zoomLevel) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
69 f.attrs[zoomLevel] = new_binsize |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
70 cooler.io.create(grp, chroms, lengths, new_bins, reader) |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
71 |
|
20
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
72 # balance |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
73 #with Pool(N_CPU) as pool: |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
74 too_close = 20000 # for HindIII |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
75 ignore_diags = max(int(np.ceil(too_close / new_binsize)), 3) |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
76 |
|
20
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
77 bias = cooler.ice.iterative_correction( |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
78 f, zoomLevel, |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
79 chunksize=chunksize, |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
80 min_nnz=10, |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
81 mad_max=3, |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
82 ignore_diags=ignore_diags, |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
83 map=map) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
84 h5opts = dict(compression='gzip', compression_opts=6) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
85 grp['bins'].create_dataset('weight', data=bias, **h5opts) |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
86 |
|
21
202ef48959d3
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
20
diff
changeset
|
87 print(zoomLevel, file=sys.stdout) |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
88 |
|
20
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
89 c = cooler.Cooler(grp) |
|
797818b4e2f6
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
19
diff
changeset
|
90 binsize = new_binsize |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
91 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
92 if __name__ == '__main__': |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
93 parser = argparse.ArgumentParser( |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
94 description="Recursively aggregate a single resolution cooler file into a multi-resolution file.") |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
95 parser.add_argument( |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
96 "cooler_file", |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
97 help="Cooler file", |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
98 metavar="COOLER_PATH") |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
99 parser.add_argument( |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
100 "--out", "-o", |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
101 help="Output text file") |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
102 args = vars(parser.parse_args()) |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
103 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
104 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
105 infile = args['cooler_file'] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
106 if args['out'] is None: |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
107 outfile = infile.replace('.cool', '.multires.cool') |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
108 else: |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
109 outfile = args['out'] |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
110 |
|
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
111 chunksize = int(1e6) |
|
22
90005893d112
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
21
diff
changeset
|
112 with warnings.catch_warnings(): |
|
90005893d112
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
21
diff
changeset
|
113 warnings.simplefilter("ignore") |
|
90005893d112
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
21
diff
changeset
|
114 main(infile, outfile, chunksize) |
|
0
c7d624661e8a
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
115 |
