Mercurial > repos > ric > test2
view galaxy-tools/biobank/utils/wells_barcode_to_label.py @ 0:ba6cf6ede027 draft default tip
Uploaded
| author | ric |
|---|---|
| date | Wed, 28 Sep 2016 06:03:30 -0400 |
| parents | |
| children |
line wrap: on
line source
""" Map PlateWell labels written as PLATE_BARCODE:WELL_LABEL to labels written as PLATE_LABEL:WELL_LABEL which is the PlateWell label format required by the map_vid application. The inputs are a TSV file and the label of the column of this file containing the PlateWell labels that are going to be mapped. """ import csv, argparse, sys, copy from bl.vl.kb import KnowledgeBase as KB from bl.vl.utils import LOG_LEVELS, get_logger import bl.vl.utils.ome_utils as vlu def get_wells_map(kb, plate_barcodes, logger): wells_map = {} logger.info('Start building PlateWells map') res = kb.get_by_field(kb.TiterPlate, 'barcode', plate_barcodes) logger.debug('Plates %r --- Results: %r', plate_barcodes, res) for _, pl in res.iteritems(): if pl.OME_TABLE == 'TiterPlate': if pl.barcode: for w in kb.get_wells_by_plate(pl): logger.debug('Mapping well %s of plate %s', w.label, w.container.label) wells_map['%s:%s' % (w.container.barcode, w.label)] = '%s:%s' % (w.container.label, w.label) else: logger.debug('TiterPlate %s has no barcode', pl.label) else: logger.debug('Object is a %r, skipping it', pl.OME_TABLE) logger.info('Mapped %d PlateWells', len(wells_map)) return wells_map def get_plates_list(records, plates_column, logger): plates = set() logger.info('Retrieving TiterPlate barcodes from %d records', len(records)) for r in records: plates.add(r[plates_column].split(':')[0]) logger.info('Found %d TiterPlate objects', len(plates)) return list(plates) def make_parser(): parser = argparse.ArgumentParser('Map barcodes in PlateWell labels to TiterPlate labels') parser.add_argument('--logfile', type=str, help='log file (default=stderr)') parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, help='logging level', default='INFO') parser.add_argument('-H', '--host', type=str, help='OMERO host') parser.add_argument('-U', '--user', type=str, help='OMERO user') parser.add_argument('-P', '--passwd', type=str, help='OMERO password') parser.add_argument('--in-file', type=str, required=True, help='input TSV file') parser.add_argument('--column-label', type=str, required=True, help='the label of the columun containing the values that will be mapped') parser.add_argument('--out-file', type=str, required=True, help='output TSV file') parser.add_argument('--strict-mapping', action='store_true', help='if output records are less than the input ones, raise an error') return parser def main(argv): parser = make_parser() args = parser.parse_args(argv) logger = get_logger('wells_barcode_to_label', level=args.loglevel, filename=args.logfile) try: host = args.host or vlu.ome_host() user = args.user or vlu.ome_user() passwd = args.passwd or vlu.ome_passwd() except ValueError, ve: logger.critical(ve) sys.exit(ve) logger.info('Starting job') kb = KB(driver='omero')(host, user, passwd) # wells_map = get_wells_map(kb, logger) with open(args.in_file) as in_file, open(args.out_file, 'w') as out_file: reader = csv.DictReader(in_file, delimiter='\t') if args.column_label not in reader.fieldnames: msg = 'No column %s in file %s' % (args.column_label, args.in_file) logger.critical(msg) raise RuntimeError(msg) records = [row for row in reader] plates = get_plates_list(records, args.column_label, logger) wells_map = get_wells_map(kb, plates, logger) logger.info('Mapping %d records', len(records)) writer = csv.DictWriter(out_file, reader.fieldnames, delimiter='\t') writer.writeheader() mapped_records = [] for rec in records: mapped = copy.deepcopy(rec) logger.debug('Mapping value %s', mapped[args.column_label]) if mapped[args.column_label] in wells_map: mapped[args.column_label] = wells_map[mapped[args.column_label]] mapped_records.append(mapped) if args.strict_mapping and len(mapped_records) < len(records): msg = 'Mapped %d record of %d' % (len(mapped_records), len(records)) logger.critical(msg) sys.exit(msg) logger.info('%d records mapped', len(mapped_records)) writer.writerows(mapped_records) logger.info('Job completed') if __name__ == '__main__': main(sys.argv[1:])
