view galaxy-tools/biobank/utils/prepare_aligned_seq_dsample_import.py @ 0:ba6cf6ede027 draft default tip

Uploaded
author ric
date Wed, 28 Sep 2016 06:03:30 -0400
parents
children
line wrap: on
line source

import csv, sys, argparse, logging

LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']


def make_parser():
    parser = argparse.ArgumentParser(description='build aligned seq data sample import files')
    parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
    parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
                        help='logging level', default='INFO')
    parser.add_argument('--study',  type=str, required=True,
                        help='study')
    parser.add_argument('--label',  type=str, required=True,
                        help='label')
    parser.add_argument('--source',  type=str, required=True,
                        help='source')
    parser.add_argument('--device',  type=str, required=True,
                        help='device')
    parser.add_argument('--path',  type=str, required=True,
                        help='path')
    parser.add_argument('--sample',  type=str, required=True,
                        help='sample')
    parser.add_argument('--genome_reference',  type=str, required=True,
                        help='genome reference')
    parser.add_argument('--dsample_ofile', type=str, default='./genome_variations_dsample.tsv',
                        help='output file containing data samples definitions')
    parser.add_argument('--dobject_ofile', type=str, default='./genome_variations_dobject.tsv',
                        help='output file containing data samples definitions')

    return parser

def main(argv):
    parser = make_parser()
    args = parser.parse_args(argv)

    log_level = getattr(logging, args.loglevel)
    kwargs = {'format': LOG_FORMAT,
              'datefmt': LOG_DATEFMT,
              'level': log_level}
    if args.logfile:
        kwargs['filename'] = args.logfile
    logging.basicConfig(**kwargs)
    logger = logging.getLogger('prepare_aligned_seq_dsample_inport')


    with open(args.dsample_ofile, 'w') as ofile:
        out_file_header = ['study', 'label', 'source', 'source_type', 'seq_dsample_type', 'status', 'device', 'sample', 'genome_reference']
        writer = csv.DictWriter(ofile, out_file_header, delimiter='\t')
        writer.writeheader()
        writer.writerow({'study': args.study,
                        'label': args.label,
                        'source': args.source,
                        'device': args.device,
                        'source_type' : 'Tube',
                        'seq_dsample_type': 'AlignedSeqDataSample',
                        'status': 'USABLE',
                        'sample' : args.sample,
                        'genome_reference': args.genome_reference})
    logger.info('Done writing file %s' % args.dsample_ofile)

    with open(args.dobject_ofile, 'w') as ofile:
        out_file_header = ['study', 'path', 'data_sample', 'mimetype', 'size', 'sha1']
        writer = csv.DictWriter(ofile, out_file_header, delimiter='\t')
        writer.writeheader()
        writer.writerow({'study': args.study,
                        'path': args.path,
                        'data_sample': args.label,
                        'mimetype': 'x-vl/bam',
                        'size': '-1',
                        'sha1': 'N.A.'})
    logger.info('Done writing file %s' % args.dobject_ofile)

if __name__ == '__main__':
    main(sys.argv[1:])