Mercurial > repos > scottx611x > multires_cooler_convert
comparison recursive_agg_onefile.py @ 0:a80d960a253b draft
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
| author | scottx611x |
|---|---|
| date | Tue, 22 Nov 2016 16:39:51 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a80d960a253b |
|---|---|
| 1 from __future__ import division, print_function | |
| 2 import sys | |
| 3 | |
| 4 from multiprocessing import Pool | |
| 5 import argparse | |
| 6 import warnings | |
| 7 | |
| 8 import numpy as np | |
| 9 import cooler | |
| 10 from cooler.io import CoolerAggregator | |
| 11 import cooler.ice | |
| 12 import h5py | |
| 13 | |
| 14 | |
| 15 FACTOR = 2 | |
| 16 TILESIZE = 256 | |
| 17 N_CPU = 8 | |
| 18 | |
| 19 | |
| 20 def main(infile, outfile, chunksize): | |
| 21 c = cooler.Cooler(infile) | |
| 22 binsize = c.info['bin-size'] | |
| 23 chromtable = c.chroms()[:] | |
| 24 chromsizes = chromtable.set_index('name')['length'] | |
| 25 chroms = chromtable['name'].values | |
| 26 lengths = chromtable['length'].values | |
| 27 total_length = np.sum(chromsizes.values) | |
| 28 n_tiles = total_length / binsize / TILESIZE | |
| 29 n_zooms = int(np.ceil(np.log2(n_tiles))) | |
| 30 | |
| 31 print( | |
| 32 "Copying base matrix to level {0} and producing {0} zoom levels starting from 0...".format( | |
| 33 n_zooms), | |
| 34 file=sys.stdout | |
| 35 ) | |
| 36 | |
| 37 # transfer base matrix | |
| 38 with h5py.File(outfile, 'w') as dest, \ | |
| 39 h5py.File(infile, 'r') as src: | |
| 40 zoomLevel = str(n_zooms) | |
| 41 src.copy('/', dest, zoomLevel) | |
| 42 print(zoomLevel, file=sys.stdout) | |
| 43 | |
| 44 # produce aggregations | |
| 45 with h5py.File(outfile, 'r+') as f: | |
| 46 grp = f[str(n_zooms)] | |
| 47 c = cooler.Cooler(grp) | |
| 48 binsize = cooler.info(grp)['bin-size'] | |
| 49 | |
| 50 for i in range(n_zooms - 1, -1, -1): | |
| 51 zoomLevel = str(i) | |
| 52 | |
| 53 # aggregate | |
| 54 new_binsize = binsize * FACTOR | |
| 55 new_bins = cooler.util.binnify(chromsizes, new_binsize) | |
| 56 | |
| 57 reader = CoolerAggregator(c, new_bins, chunksize) | |
| 58 | |
| 59 grp = f.create_group(zoomLevel) | |
| 60 f.attrs[zoomLevel] = new_binsize | |
| 61 cooler.io.create(grp, chroms, lengths, new_bins, reader) | |
| 62 | |
| 63 # balance | |
| 64 # with Pool(N_CPU) as pool: | |
| 65 too_close = 20000 # for HindIII | |
| 66 ignore_diags = max(int(np.ceil(too_close / new_binsize)), 3) | |
| 67 | |
| 68 bias = cooler.ice.iterative_correction( | |
| 69 f, zoomLevel, | |
| 70 chunksize=chunksize, | |
| 71 min_nnz=10, | |
| 72 mad_max=3, | |
| 73 ignore_diags=ignore_diags, | |
| 74 map=map) | |
| 75 h5opts = dict(compression='gzip', compression_opts=6) | |
| 76 grp['bins'].create_dataset('weight', data=bias, **h5opts) | |
| 77 | |
| 78 print(zoomLevel, file=sys.stdout) | |
| 79 | |
| 80 c = cooler.Cooler(grp) | |
| 81 binsize = new_binsize | |
| 82 | |
| 83 if __name__ == '__main__': | |
| 84 parser = argparse.ArgumentParser( | |
| 85 description="Recursively aggregate a single resolution cooler file into a multi-resolution file.") | |
| 86 parser.add_argument( | |
| 87 "cooler_file", | |
| 88 help="Cooler file", | |
| 89 metavar="COOLER_PATH") | |
| 90 parser.add_argument( | |
| 91 "--out", "-o", | |
| 92 help="Output text file") | |
| 93 args = vars(parser.parse_args()) | |
| 94 | |
| 95 infile = args['cooler_file'] | |
| 96 if args['out'] is None: | |
| 97 outfile = infile.replace('.cool', '.multires.cool') | |
| 98 else: | |
| 99 outfile = args['out'] | |
| 100 | |
| 101 chunksize = int(1e6) | |
| 102 with warnings.catch_warnings(): | |
| 103 warnings.simplefilter("ignore") | |
| 104 main(infile, outfile, chunksize) |
