view data_manager/mash_build_sketch.py @ 1:7aea7633ec0b draft

planemo upload for repository https://github.com/dfornika/galaxytools/tree/master/data_managers/data_manager_build_mash_sketch/ commit 423cf96266f6ac433052ff72edb1397502313010-dirty
author dfornika
date Thu, 25 Jul 2019 13:32:46 -0400
parents 6e90d7d564ee
children ea2cee927c04
line wrap: on
line source

#!/usr/bin/env python

from __future__ import print_function

import argparse
import datetime
import errno
import json
import os
import string
import subprocess
import sys
import uuid


DATA_TABLE_NAME = "mash_sketches"


def mash_build_sketch(target_directory, mash_args, database_name, data_table_name=DATA_TABLE_NAME):

    now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")

    database_value = str(uuid.uuid4())

    database_name = database_name

    database_path = database_value

    for input_seq in mash_args['input_seqs']:
        mash_sketch_args_list = [
            '-p', mash_args['threads'],
            '-k', mash_args['kmer_size'],
            '-s', mash_args['sketch_size'],
            input_seq,
        ]
        
        subprocess.check_call(['mash', 'sketch'] + mash_sketch_args_list,
                              cwd=os.path.join(target_directory, database_path, 'tmp'))

    mash_paste_args_list = [
        'sketch',
        'tmp/*.msh'
    ]

    subprocess.check_call(['mash', 'paste'] + mash_sketch_args_list,
                          cwd=os.path.join(target_directory, database_path))

    subprocess.check_call(['rm' '-r' 'tmp']
                          cwd=os.path.join(target_directory, database_path)))
    
    bagit_args_list = [
        database_path,
    ]

    subprocess.call(['bagit.py'] + bagit_args_list, cwd=target_directory)
    
    data_table_entry = {
        "data_tables": {
            data_table_name: [
                {
                    "value": database_value,
                    "name": database_name,
                    "path": os.path.join(database_path, 'data'),
                }
            ]
        }
    }

    return data_table_entry


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('data_manager_json')
    parser.add_argument('--threads', dest='threads', default=1, help='threads' )
    parser.add_argument('--kmer-size', dest='kmer_size', help='K-mer size' )
    parser.add_argument('--sketch-size', dest='sketch_size', help='Sketch size' )
    args = parser.parse_args()

    data_manager_input = json.loads(open(args.data_manager_json).read())

    target_directory = data_manager_input['output_data'][0]['extra_files_path']

    mash_args = {
        'kmer_size': args.kmer_size,
        'sketch_size': args.kmer_size,
        'threads': args.threads,
    }

    try:
        os.mkdir( target_directory )
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir( target_directory ):
            pass
        else:
            raise

    data_manager_output = {}

    data_manager_output = mash_build_sketch(
        target_directory,
        mash_args,
        args.database_name,
    )

    open(args.data_manager_json, 'wb').write(json.dumps(data_manager_output))


if __name__ == "__main__":
    main()