multires_cooler_convert: recursive_agg

comparison recursive_agg_onefile.py @ 0:a80d960a253b draft

planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty

author	scottx611x
date	Tue, 22 Nov 2016 16:39:51 -0500
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:a80d960a253b
+from __future__ import division, print_function
+import sys
+from multiprocessing import Pool
+import argparse
+import warnings
+import numpy as np
+import cooler
+from cooler.io import CoolerAggregator
+import cooler.ice
+import h5py
+FACTOR = 2
+TILESIZE = 256
+N_CPU = 8
+def main(infile, outfile, chunksize):
+c = cooler.Cooler(infile)
+binsize = c.info['bin-size']
+chromtable = c.chroms()[:]
+chromsizes = chromtable.set_index('name')['length']
+chroms = chromtable['name'].values
+lengths = chromtable['length'].values
+total_length = np.sum(chromsizes.values)
+n_tiles = total_length / binsize / TILESIZE
+n_zooms = int(np.ceil(np.log2(n_tiles)))
+print(
+"Copying base matrix to level {0} and producing {0} zoom levels starting from 0...".format(
+n_zooms),
+file=sys.stdout
+)
+# transfer base matrix
+with h5py.File(outfile, 'w') as dest, \
+h5py.File(infile, 'r') as src:
+zoomLevel = str(n_zooms)
+src.copy('/', dest, zoomLevel)
+print(zoomLevel, file=sys.stdout)
+# produce aggregations
+with h5py.File(outfile, 'r+') as f:
+grp = f[str(n_zooms)]
+c = cooler.Cooler(grp)
+binsize = cooler.info(grp)['bin-size']
+for i in range(n_zooms - 1, -1, -1):
+zoomLevel = str(i)
+# aggregate
+new_binsize = binsize * FACTOR
+new_bins = cooler.util.binnify(chromsizes, new_binsize)
+reader = CoolerAggregator(c, new_bins, chunksize)
+grp = f.create_group(zoomLevel)
+f.attrs[zoomLevel] = new_binsize
+cooler.io.create(grp, chroms, lengths, new_bins, reader)
+# balance
+# with Pool(N_CPU) as pool:
+too_close = 20000  # for HindIII
+ignore_diags = max(int(np.ceil(too_close / new_binsize)), 3)
+bias = cooler.ice.iterative_correction(
+f, zoomLevel,
+chunksize=chunksize,
+min_nnz=10,
+mad_max=3,
+ignore_diags=ignore_diags,
+map=map)
+h5opts = dict(compression='gzip', compression_opts=6)
+grp['bins'].create_dataset('weight', data=bias, **h5opts)
+print(zoomLevel, file=sys.stdout)
+c = cooler.Cooler(grp)
+binsize = new_binsize
+if __name__ == '__main__':
+parser = argparse.ArgumentParser(
+description="Recursively aggregate a single resolution cooler file into a multi-resolution file.")
+parser.add_argument(
+"cooler_file",
+help="Cooler file",
+metavar="COOLER_PATH")
+parser.add_argument(
+"--out", "-o",
+help="Output text file")
+args = vars(parser.parse_args())
+infile = args['cooler_file']
+if args['out'] is None:
+outfile = infile.replace('.cool', '.multires.cool')
+else:
+outfile = args['out']
+chunksize = int(1e6)
+with warnings.catch_warnings():
+warnings.simplefilter("ignore")
+main(infile, outfile, chunksize)

Mercurial > repos > scottx611x > multires_cooler_convert

comparison recursive_agg_onefile.py @ 0:a80d960a253b draft