Mercurial > repos > scottx611x > multires_cooler_convert
view recursive_agg_onefile.py @ 0:a80d960a253b draft
planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
| author | scottx611x |
|---|---|
| date | Tue, 22 Nov 2016 16:39:51 -0500 |
| parents | |
| children |
line wrap: on
line source
from __future__ import division, print_function import sys from multiprocessing import Pool import argparse import warnings import numpy as np import cooler from cooler.io import CoolerAggregator import cooler.ice import h5py FACTOR = 2 TILESIZE = 256 N_CPU = 8 def main(infile, outfile, chunksize): c = cooler.Cooler(infile) binsize = c.info['bin-size'] chromtable = c.chroms()[:] chromsizes = chromtable.set_index('name')['length'] chroms = chromtable['name'].values lengths = chromtable['length'].values total_length = np.sum(chromsizes.values) n_tiles = total_length / binsize / TILESIZE n_zooms = int(np.ceil(np.log2(n_tiles))) print( "Copying base matrix to level {0} and producing {0} zoom levels starting from 0...".format( n_zooms), file=sys.stdout ) # transfer base matrix with h5py.File(outfile, 'w') as dest, \ h5py.File(infile, 'r') as src: zoomLevel = str(n_zooms) src.copy('/', dest, zoomLevel) print(zoomLevel, file=sys.stdout) # produce aggregations with h5py.File(outfile, 'r+') as f: grp = f[str(n_zooms)] c = cooler.Cooler(grp) binsize = cooler.info(grp)['bin-size'] for i in range(n_zooms - 1, -1, -1): zoomLevel = str(i) # aggregate new_binsize = binsize * FACTOR new_bins = cooler.util.binnify(chromsizes, new_binsize) reader = CoolerAggregator(c, new_bins, chunksize) grp = f.create_group(zoomLevel) f.attrs[zoomLevel] = new_binsize cooler.io.create(grp, chroms, lengths, new_bins, reader) # balance # with Pool(N_CPU) as pool: too_close = 20000 # for HindIII ignore_diags = max(int(np.ceil(too_close / new_binsize)), 3) bias = cooler.ice.iterative_correction( f, zoomLevel, chunksize=chunksize, min_nnz=10, mad_max=3, ignore_diags=ignore_diags, map=map) h5opts = dict(compression='gzip', compression_opts=6) grp['bins'].create_dataset('weight', data=bias, **h5opts) print(zoomLevel, file=sys.stdout) c = cooler.Cooler(grp) binsize = new_binsize if __name__ == '__main__': parser = argparse.ArgumentParser( description="Recursively aggregate a single resolution cooler file into a multi-resolution file.") parser.add_argument( "cooler_file", help="Cooler file", metavar="COOLER_PATH") parser.add_argument( "--out", "-o", help="Output text file") args = vars(parser.parse_args()) infile = args['cooler_file'] if args['out'] is None: outfile = infile.replace('.cool', '.multires.cool') else: outfile = args['out'] chunksize = int(1e6) with warnings.catch_warnings(): warnings.simplefilter("ignore") main(infile, outfile, chunksize)
