Mercurial > repos > ric > test1
view galaxy-tools/biobank/utils/check_tsv_header.py @ 3:43be74e62bfe draft
Uploaded
author | ric |
---|---|
date | Thu, 22 Sep 2016 08:57:04 -0400 |
parents | |
children |
line wrap: on
line source
""" """ import csv, sys, argparse, logging LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] def make_parser(): parser = argparse.ArgumentParser(description='prepare inputs for VLUTIL.prepare_seq_dsample_inputs') parser.add_argument('--logfile', type=str, help='log file (default=stderr)') parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, help='logging level', default='INFO') parser.add_argument('--in-file', '-i', type=str, required=True, help='input file') return parser def read_samplesheet(in_file): with open(in_file, 'rU') as f: reader = csv.DictReader(f, delimiter='\t') recs = [r for r in reader] fieldnames = reader.fieldnames return recs, fieldnames def is_header_different(titer_plate_header, header_to_be_checked): result = set(header_to_be_checked).difference(frozenset(titer_plate_header)) if len(result) > 0: return True else: return False def main(argv): parser = make_parser() args = parser.parse_args(argv) log_level = getattr(logging, args.loglevel) kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': log_level} if args.logfile: kwargs['filename'] = args.logfile logging.basicConfig(**kwargs) logger = logging.getLogger('check tsv header') logger.info('Loading data from file %s' % args.in_file) recs, file_header = read_samplesheet(args.in_file) logger.debug('Retrieved %d records', len(recs)) titer_plate_header = ['study', 'label', 'barcode', 'rows', 'columns'] if is_header_different(titer_plate_header, file_header): logger.info("Tsv file header has to have this elements: {}".format(titer_plate_header)) # hack to support old galaxy sys.stderr.write("Headers not compliant to: {}".format(titer_plate_header)) sys.exit(2) else: logger.info("Header check passed") if __name__ == '__main__': main(sys.argv[1:])