annotate galaxy-tools/biobank/utils/check_tsv_header.py @ 3:43be74e62bfe draft

Uploaded
author ric
date Thu, 22 Sep 2016 08:57:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
43be74e62bfe Uploaded
ric
parents:
diff changeset
1 """
43be74e62bfe Uploaded
ric
parents:
diff changeset
2 """
43be74e62bfe Uploaded
ric
parents:
diff changeset
3 import csv, sys, argparse, logging
43be74e62bfe Uploaded
ric
parents:
diff changeset
4
43be74e62bfe Uploaded
ric
parents:
diff changeset
5
43be74e62bfe Uploaded
ric
parents:
diff changeset
6 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
43be74e62bfe Uploaded
ric
parents:
diff changeset
7 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
43be74e62bfe Uploaded
ric
parents:
diff changeset
8 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
43be74e62bfe Uploaded
ric
parents:
diff changeset
9
43be74e62bfe Uploaded
ric
parents:
diff changeset
10 def make_parser():
43be74e62bfe Uploaded
ric
parents:
diff changeset
11 parser = argparse.ArgumentParser(description='prepare inputs for VLUTIL.prepare_seq_dsample_inputs')
43be74e62bfe Uploaded
ric
parents:
diff changeset
12 parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
43be74e62bfe Uploaded
ric
parents:
diff changeset
13 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
43be74e62bfe Uploaded
ric
parents:
diff changeset
14 help='logging level', default='INFO')
43be74e62bfe Uploaded
ric
parents:
diff changeset
15 parser.add_argument('--in-file', '-i', type=str, required=True,
43be74e62bfe Uploaded
ric
parents:
diff changeset
16 help='input file')
43be74e62bfe Uploaded
ric
parents:
diff changeset
17 return parser
43be74e62bfe Uploaded
ric
parents:
diff changeset
18
43be74e62bfe Uploaded
ric
parents:
diff changeset
19 def read_samplesheet(in_file):
43be74e62bfe Uploaded
ric
parents:
diff changeset
20 with open(in_file, 'rU') as f:
43be74e62bfe Uploaded
ric
parents:
diff changeset
21 reader = csv.DictReader(f, delimiter='\t')
43be74e62bfe Uploaded
ric
parents:
diff changeset
22 recs = [r for r in reader]
43be74e62bfe Uploaded
ric
parents:
diff changeset
23 fieldnames = reader.fieldnames
43be74e62bfe Uploaded
ric
parents:
diff changeset
24 return recs, fieldnames
43be74e62bfe Uploaded
ric
parents:
diff changeset
25
43be74e62bfe Uploaded
ric
parents:
diff changeset
26 def is_header_different(titer_plate_header, header_to_be_checked):
43be74e62bfe Uploaded
ric
parents:
diff changeset
27 result = set(header_to_be_checked).difference(frozenset(titer_plate_header))
43be74e62bfe Uploaded
ric
parents:
diff changeset
28 if len(result) > 0:
43be74e62bfe Uploaded
ric
parents:
diff changeset
29 return True
43be74e62bfe Uploaded
ric
parents:
diff changeset
30 else:
43be74e62bfe Uploaded
ric
parents:
diff changeset
31 return False
43be74e62bfe Uploaded
ric
parents:
diff changeset
32
43be74e62bfe Uploaded
ric
parents:
diff changeset
33 def main(argv):
43be74e62bfe Uploaded
ric
parents:
diff changeset
34 parser = make_parser()
43be74e62bfe Uploaded
ric
parents:
diff changeset
35 args = parser.parse_args(argv)
43be74e62bfe Uploaded
ric
parents:
diff changeset
36
43be74e62bfe Uploaded
ric
parents:
diff changeset
37 log_level = getattr(logging, args.loglevel)
43be74e62bfe Uploaded
ric
parents:
diff changeset
38 kwargs = {'format': LOG_FORMAT,
43be74e62bfe Uploaded
ric
parents:
diff changeset
39 'datefmt': LOG_DATEFMT,
43be74e62bfe Uploaded
ric
parents:
diff changeset
40 'level': log_level}
43be74e62bfe Uploaded
ric
parents:
diff changeset
41 if args.logfile:
43be74e62bfe Uploaded
ric
parents:
diff changeset
42 kwargs['filename'] = args.logfile
43be74e62bfe Uploaded
ric
parents:
diff changeset
43 logging.basicConfig(**kwargs)
43be74e62bfe Uploaded
ric
parents:
diff changeset
44 logger = logging.getLogger('check tsv header')
43be74e62bfe Uploaded
ric
parents:
diff changeset
45
43be74e62bfe Uploaded
ric
parents:
diff changeset
46 logger.info('Loading data from file %s' % args.in_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
47 recs, file_header = read_samplesheet(args.in_file)
43be74e62bfe Uploaded
ric
parents:
diff changeset
48 logger.debug('Retrieved %d records', len(recs))
43be74e62bfe Uploaded
ric
parents:
diff changeset
49
43be74e62bfe Uploaded
ric
parents:
diff changeset
50 titer_plate_header = ['study', 'label', 'barcode', 'rows', 'columns']
43be74e62bfe Uploaded
ric
parents:
diff changeset
51
43be74e62bfe Uploaded
ric
parents:
diff changeset
52 if is_header_different(titer_plate_header, file_header):
43be74e62bfe Uploaded
ric
parents:
diff changeset
53 logger.info("Tsv file header has to have this elements: {}".format(titer_plate_header))
43be74e62bfe Uploaded
ric
parents:
diff changeset
54 # hack to support old galaxy
43be74e62bfe Uploaded
ric
parents:
diff changeset
55 sys.stderr.write("Headers not compliant to: {}".format(titer_plate_header))
43be74e62bfe Uploaded
ric
parents:
diff changeset
56 sys.exit(2)
43be74e62bfe Uploaded
ric
parents:
diff changeset
57 else:
43be74e62bfe Uploaded
ric
parents:
diff changeset
58 logger.info("Header check passed")
43be74e62bfe Uploaded
ric
parents:
diff changeset
59
43be74e62bfe Uploaded
ric
parents:
diff changeset
60 if __name__ == '__main__':
43be74e62bfe Uploaded
ric
parents:
diff changeset
61 main(sys.argv[1:])