diff recursive_agg_onefile.py @ 0:c7d624661e8a draft

planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
author scottx611x
date Sun, 20 Nov 2016 18:30:37 -0500
parents
children b51c0906bc8b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/recursive_agg_onefile.py	Sun Nov 20 18:30:37 2016 -0500
@@ -0,0 +1,116 @@
+from __future__ import division, print_function
+from multiprocessing import Pool
+import argparse
+import sys
+
+import numpy as np
+from cooler.io import CoolerAggregator
+import cooler.ice
+import cooler
+import h5py
+
+
+FACTOR = 2
+TILESIZE = 256
+N_CPU = 8
+
+
+def set_postmortem_hook():
+    import sys, traceback, ipdb
+    def _excepthook(exc_type, value, tb):
+        traceback.print_exception(exc_type, value, tb)
+        print()
+        ipdb.pm()
+    sys.excepthook = _excepthook
+
+set_postmortem_hook()
+
+
+def main(infile, outfile, chunksize):
+    c = cooler.Cooler(infile)
+    binsize = c.info['bin-size']
+    chromtable = c.chroms()[:]
+    chromsizes = chromtable.set_index('name')['length']
+    chroms = chromtable['name'].values
+    lengths = chromtable['length'].values
+    total_length = np.sum(chromsizes.values)
+    n_tiles = total_length / binsize / TILESIZE
+    n_zooms = int(np.ceil(np.log2(n_tiles)))
+
+    print(
+        "Copying base matrix to level {0} and producing {0} zoom levels starting from 0...".format(n_zooms),
+        file=sys.stderr
+    )
+
+    # transfer base matrix
+    with h5py.File(outfile, 'w') as dest, \
+         h5py.File(infile, 'r') as src:
+
+        zoomLevel = str(n_zooms)
+        src.copy('/', dest, zoomLevel)
+
+        print(zoomLevel, file=sys.stderr)
+
+
+    # produce aggregations
+    with h5py.File(outfile, 'r+') as f:
+        grp = f[str(n_zooms)]
+        c = cooler.Cooler(grp)
+        binsize = cooler.info(grp)['bin-size']
+
+        for i in range(n_zooms - 1, -1, -1):
+            zoomLevel = str(i)
+
+            # aggregate
+            new_binsize = binsize * FACTOR
+            new_bins = cooler.util.binnify(chromsizes, new_binsize)
+     
+            reader = CoolerAggregator(c, new_bins, chunksize)
+            
+            grp = f.create_group(zoomLevel)
+            f.attrs[zoomLevel] = new_binsize
+            cooler.io.create(grp, chroms, lengths, new_bins, reader)
+
+            # balance
+            #with Pool(N_CPU) as pool:
+            too_close = 20000  # for HindIII
+            ignore_diags = max(int(np.ceil(too_close / new_binsize)), 3)
+
+            bias = cooler.ice.iterative_correction(
+                f, zoomLevel,
+                chunksize=chunksize,
+                min_nnz=10,
+                mad_max=3,
+                ignore_diags=ignore_diags,
+                map=map)
+            h5opts = dict(compression='gzip', compression_opts=6)
+            grp['bins'].create_dataset('weight', data=bias, **h5opts)
+
+            print(zoomLevel, file=sys.stderr)
+
+            c = cooler.Cooler(grp)
+            binsize = new_binsize
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description="Recursively aggregate a single resolution cooler file into a multi-resolution file.")
+    parser.add_argument(
+        "cooler_file",
+        help="Cooler file",
+        metavar="COOLER_PATH")
+    parser.add_argument(
+        "--out", "-o",
+        help="Output text file")
+    args = vars(parser.parse_args())
+
+
+    infile = args['cooler_file']
+    if args['out'] is None:
+        outfile = infile.replace('.cool', '.multires.cool')
+    else:
+        outfile = args['out']
+
+    chunksize = int(1e6)
+    main(infile, outfile, chunksize)
+