diff galaxy-tools/biobank/utils/check_tsv_header.py @ 3:43be74e62bfe draft

Uploaded
author ric
date Thu, 22 Sep 2016 08:57:04 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/check_tsv_header.py	Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,61 @@
+"""
+"""
+import csv, sys, argparse, logging
+
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+
+def make_parser():
+    parser = argparse.ArgumentParser(description='prepare inputs for VLUTIL.prepare_seq_dsample_inputs')
+    parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
+    parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+                        help='logging level', default='INFO')
+    parser.add_argument('--in-file', '-i', type=str, required=True,
+                        help='input file')
+    return parser
+
+def read_samplesheet(in_file):
+    with open(in_file, 'rU') as f:
+        reader = csv.DictReader(f, delimiter='\t')
+        recs = [r for r in reader]
+        fieldnames = reader.fieldnames
+    return recs, fieldnames
+
+def is_header_different(titer_plate_header, header_to_be_checked):
+    result = set(header_to_be_checked).difference(frozenset(titer_plate_header))
+    if len(result) > 0:
+        return True
+    else:
+        return False
+
+def main(argv):
+    parser = make_parser()
+    args = parser.parse_args(argv)
+
+    log_level = getattr(logging, args.loglevel)
+    kwargs = {'format': LOG_FORMAT,
+              'datefmt': LOG_DATEFMT,
+              'level': log_level}
+    if args.logfile:
+        kwargs['filename'] = args.logfile
+    logging.basicConfig(**kwargs)
+    logger = logging.getLogger('check tsv header')
+
+    logger.info('Loading data from file %s' % args.in_file)
+    recs, file_header = read_samplesheet(args.in_file)
+    logger.debug('Retrieved %d records', len(recs))
+
+    titer_plate_header = ['study', 'label', 'barcode', 'rows', 'columns']
+
+    if is_header_different(titer_plate_header, file_header):
+        logger.info("Tsv file header has to have this elements: {}".format(titer_plate_header))
+        # hack to support old galaxy
+        sys.stderr.write("Headers not compliant to: {}".format(titer_plate_header))
+        sys.exit(2)     
+    else:
+        logger.info("Header check passed")
+
+if __name__ == '__main__':
+    main(sys.argv[1:])