view data_manager/mash_sketch_builder.py @ 11:7ada2cb42a4b draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mash_sketch_builder/ commit 05fb8d8428ce9bc84837bb4043018ab2135f5727-dirty"
author dfornika
date Mon, 24 Feb 2020 18:47:10 +0000
parents 523d14e3d36f
children dbac49469bc2
line wrap: on
line source

#!/usr/bin/env python

import argparse
import errno
import json
import os
import subprocess
import sys
import uuid

from pprint import pprint

DATA_TABLE_NAME = "mash_sketches"


def mash_sketch(mash_sketch_args, sketch_name, target_directory, data_table_name=DATA_TABLE_NAME):
    UUID = str(uuid.uuid4())

    os.mkdir(os.path.join(target_directory, UUID))

    sketch_path = os.path.join(UUID, "sketch"),

    args = [
        '-k', str(mash_sketch_args["kmer_size"]),
        '-s', str(mash_sketch_args["sketch_size"]),
        '-o', sketch_path,
        '-p', str(mash_sketch_args["threads"]),
        str(mash_sketch_args["fasta"]),
    ]

    cmd = ' '.join(['mash', 'sketch'] + args)

    pprint(cmd)
    
    subprocess.check_call(cmd, cwd=target_directory)


    data_table_entry = {
        'data_tables': {
            data_table_name: [
                {
                    "value": UUID,
                    "name": sketch_name,
                    "path": sketch_path,
                }
            ]
        }
    }

    return data_table_entry


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('data_manager_json')
    parser.add_argument('--kmer-size', dest='kmer_size', type=int, default=35, help='kmer length')
    parser.add_argument('--sketch-size', dest='sketch_size', type=int, default=31, help='minimizer length')
    parser.add_argument('--fasta', dest='fasta', help='Fasta file to sketch')
    parser.add_argument('--threads', dest='threads', default=1, help='threads')
    parser.add_argument('--sketch-name', dest='sketch_name', help='Name for sketch')
    args = parser.parse_args()

    data_manager_input = json.loads(open(args.data_manager_json).read())
    
    target_directory = data_manager_input['output_data'][0]['extra_files_path']
    
    pprint("***" + target_directory + "***" )
    
    try:
        os.mkdir( target_directory )
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir( target_directory ):
            pass
        else:
            raise

    data_manager_output = {}

    mash_sketch_args = {
        "kmer_size": args.kmer_size,
        "sketch_size": args.sketch_size,
        "fasta": args.fasta,
        "threads": args.threads,
    }

    data_manager_output = mash_sketch(
        mash_sketch_args,
        args.sketch_name,
        target_directory,
    )

    open(args.data_manager_json, 'w').write(json.dumps(data_manager_output, sort_keys=True))


if __name__ == "__main__":
    main()