view recursive_agg_onefile.py @ 0:a80d960a253b draft

planemo upload commit 9e980862c8b763cf3dd0209be49f0d2d75a90614-dirty
author scottx611x
date Tue, 22 Nov 2016 16:39:51 -0500
parents
children
line wrap: on
line source

from __future__ import division, print_function
import sys

from multiprocessing import Pool
import argparse
import warnings

import numpy as np
import cooler
from cooler.io import CoolerAggregator
import cooler.ice
import h5py


FACTOR = 2
TILESIZE = 256
N_CPU = 8


def main(infile, outfile, chunksize):
    c = cooler.Cooler(infile)
    binsize = c.info['bin-size']
    chromtable = c.chroms()[:]
    chromsizes = chromtable.set_index('name')['length']
    chroms = chromtable['name'].values
    lengths = chromtable['length'].values
    total_length = np.sum(chromsizes.values)
    n_tiles = total_length / binsize / TILESIZE
    n_zooms = int(np.ceil(np.log2(n_tiles)))

    print(
        "Copying base matrix to level {0} and producing {0} zoom levels starting from 0...".format(
            n_zooms),
        file=sys.stdout
    )

    # transfer base matrix
    with h5py.File(outfile, 'w') as dest, \
            h5py.File(infile, 'r') as src:
        zoomLevel = str(n_zooms)
        src.copy('/', dest, zoomLevel)
        print(zoomLevel, file=sys.stdout)

    # produce aggregations
    with h5py.File(outfile, 'r+') as f:
        grp = f[str(n_zooms)]
        c = cooler.Cooler(grp)
        binsize = cooler.info(grp)['bin-size']

        for i in range(n_zooms - 1, -1, -1):
            zoomLevel = str(i)

            # aggregate
            new_binsize = binsize * FACTOR
            new_bins = cooler.util.binnify(chromsizes, new_binsize)

            reader = CoolerAggregator(c, new_bins, chunksize)

            grp = f.create_group(zoomLevel)
            f.attrs[zoomLevel] = new_binsize
            cooler.io.create(grp, chroms, lengths, new_bins, reader)

            # balance
            # with Pool(N_CPU) as pool:
            too_close = 20000  # for HindIII
            ignore_diags = max(int(np.ceil(too_close / new_binsize)), 3)

            bias = cooler.ice.iterative_correction(
                f, zoomLevel,
                chunksize=chunksize,
                min_nnz=10,
                mad_max=3,
                ignore_diags=ignore_diags,
                map=map)
            h5opts = dict(compression='gzip', compression_opts=6)
            grp['bins'].create_dataset('weight', data=bias, **h5opts)

            print(zoomLevel, file=sys.stdout)

            c = cooler.Cooler(grp)
            binsize = new_binsize

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description="Recursively aggregate a single resolution cooler file into a multi-resolution file.")
    parser.add_argument(
        "cooler_file",
        help="Cooler file",
        metavar="COOLER_PATH")
    parser.add_argument(
        "--out", "-o",
        help="Output text file")
    args = vars(parser.parse_args())

    infile = args['cooler_file']
    if args['out'] is None:
        outfile = infile.replace('.cool', '.multires.cool')
    else:
        outfile = args['out']

    chunksize = int(1e6)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        main(infile, outfile, chunksize)