Mercurial > repos > ric > test1
comparison galaxy-tools/biobank/utils/check_tsv_header.py @ 3:43be74e62bfe draft
Uploaded
author | ric |
---|---|
date | Thu, 22 Sep 2016 08:57:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:47bf0086e082 | 3:43be74e62bfe |
---|---|
1 """ | |
2 """ | |
3 import csv, sys, argparse, logging | |
4 | |
5 | |
6 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' | |
7 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' | |
8 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] | |
9 | |
10 def make_parser(): | |
11 parser = argparse.ArgumentParser(description='prepare inputs for VLUTIL.prepare_seq_dsample_inputs') | |
12 parser.add_argument('--logfile', type=str, help='log file (default=stderr)') | |
13 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, | |
14 help='logging level', default='INFO') | |
15 parser.add_argument('--in-file', '-i', type=str, required=True, | |
16 help='input file') | |
17 return parser | |
18 | |
19 def read_samplesheet(in_file): | |
20 with open(in_file, 'rU') as f: | |
21 reader = csv.DictReader(f, delimiter='\t') | |
22 recs = [r for r in reader] | |
23 fieldnames = reader.fieldnames | |
24 return recs, fieldnames | |
25 | |
26 def is_header_different(titer_plate_header, header_to_be_checked): | |
27 result = set(header_to_be_checked).difference(frozenset(titer_plate_header)) | |
28 if len(result) > 0: | |
29 return True | |
30 else: | |
31 return False | |
32 | |
33 def main(argv): | |
34 parser = make_parser() | |
35 args = parser.parse_args(argv) | |
36 | |
37 log_level = getattr(logging, args.loglevel) | |
38 kwargs = {'format': LOG_FORMAT, | |
39 'datefmt': LOG_DATEFMT, | |
40 'level': log_level} | |
41 if args.logfile: | |
42 kwargs['filename'] = args.logfile | |
43 logging.basicConfig(**kwargs) | |
44 logger = logging.getLogger('check tsv header') | |
45 | |
46 logger.info('Loading data from file %s' % args.in_file) | |
47 recs, file_header = read_samplesheet(args.in_file) | |
48 logger.debug('Retrieved %d records', len(recs)) | |
49 | |
50 titer_plate_header = ['study', 'label', 'barcode', 'rows', 'columns'] | |
51 | |
52 if is_header_different(titer_plate_header, file_header): | |
53 logger.info("Tsv file header has to have this elements: {}".format(titer_plate_header)) | |
54 # hack to support old galaxy | |
55 sys.stderr.write("Headers not compliant to: {}".format(titer_plate_header)) | |
56 sys.exit(2) | |
57 else: | |
58 logger.info("Header check passed") | |
59 | |
60 if __name__ == '__main__': | |
61 main(sys.argv[1:]) |