Mercurial > repos > scottx611x > multires_cooler_convert
annotate recursive_agg_onefile.py @ 0:a80d960a253b draft
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
| author | scottx611x |
|---|---|
| date | Tue, 22 Nov 2016 16:39:51 -0500 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
1 from __future__ import division, print_function |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
2 import sys |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
3 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
4 from multiprocessing import Pool |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
5 import argparse |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
6 import warnings |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
7 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
8 import numpy as np |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
9 import cooler |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
10 from cooler.io import CoolerAggregator |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
11 import cooler.ice |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
12 import h5py |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
13 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
14 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
15 FACTOR = 2 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
16 TILESIZE = 256 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
17 N_CPU = 8 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
18 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
19 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
20 def main(infile, outfile, chunksize): |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
21 c = cooler.Cooler(infile) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
22 binsize = c.info['bin-size'] |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
23 chromtable = c.chroms()[:] |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
24 chromsizes = chromtable.set_index('name')['length'] |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
25 chroms = chromtable['name'].values |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
26 lengths = chromtable['length'].values |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
27 total_length = np.sum(chromsizes.values) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
28 n_tiles = total_length / binsize / TILESIZE |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
29 n_zooms = int(np.ceil(np.log2(n_tiles))) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
30 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
31 print( |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
32 "Copying base matrix to level {0} and producing {0} zoom levels starting from 0...".format( |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
33 n_zooms), |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
34 file=sys.stdout |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
35 ) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
36 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
37 # transfer base matrix |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
38 with h5py.File(outfile, 'w') as dest, \ |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
39 h5py.File(infile, 'r') as src: |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
40 zoomLevel = str(n_zooms) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
41 src.copy('/', dest, zoomLevel) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
42 print(zoomLevel, file=sys.stdout) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
43 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
44 # produce aggregations |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
45 with h5py.File(outfile, 'r+') as f: |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
46 grp = f[str(n_zooms)] |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
47 c = cooler.Cooler(grp) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
48 binsize = cooler.info(grp)['bin-size'] |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
49 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
50 for i in range(n_zooms - 1, -1, -1): |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
51 zoomLevel = str(i) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
52 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
53 # aggregate |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
54 new_binsize = binsize * FACTOR |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
55 new_bins = cooler.util.binnify(chromsizes, new_binsize) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
56 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
57 reader = CoolerAggregator(c, new_bins, chunksize) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
58 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
59 grp = f.create_group(zoomLevel) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
60 f.attrs[zoomLevel] = new_binsize |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
61 cooler.io.create(grp, chroms, lengths, new_bins, reader) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
62 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
63 # balance |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
64 # with Pool(N_CPU) as pool: |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
65 too_close = 20000 # for HindIII |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
66 ignore_diags = max(int(np.ceil(too_close / new_binsize)), 3) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
67 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
68 bias = cooler.ice.iterative_correction( |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
69 f, zoomLevel, |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
70 chunksize=chunksize, |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
71 min_nnz=10, |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
72 mad_max=3, |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
73 ignore_diags=ignore_diags, |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
74 map=map) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
75 h5opts = dict(compression='gzip', compression_opts=6) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
76 grp['bins'].create_dataset('weight', data=bias, **h5opts) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
77 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
78 print(zoomLevel, file=sys.stdout) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
79 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
80 c = cooler.Cooler(grp) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
81 binsize = new_binsize |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
82 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
83 if __name__ == '__main__': |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
84 parser = argparse.ArgumentParser( |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
85 description="Recursively aggregate a single resolution cooler file into a multi-resolution file.") |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
86 parser.add_argument( |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
87 "cooler_file", |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
88 help="Cooler file", |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
89 metavar="COOLER_PATH") |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
90 parser.add_argument( |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
91 "--out", "-o", |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
92 help="Output text file") |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
93 args = vars(parser.parse_args()) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
94 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
95 infile = args['cooler_file'] |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
96 if args['out'] is None: |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
97 outfile = infile.replace('.cool', '.multires.cool') |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
98 else: |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
99 outfile = args['out'] |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
100 |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
101 chunksize = int(1e6) |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
102 with warnings.catch_warnings(): |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
103 warnings.simplefilter("ignore") |
|
a80d960a253b
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
scottx611x
parents:
diff
changeset
|
104 main(infile, outfile, chunksize) |
