diff galaxy-tools/biobank/utils/from_well_to_illumina_measures.py @ 3:43be74e62bfe draft

Uploaded
author ric
date Thu, 22 Sep 2016 08:57:04 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-tools/biobank/utils/from_well_to_illumina_measures.py	Thu Sep 22 08:57:04 2016 -0400
@@ -0,0 +1,95 @@
+import sys, argparse, csv
+
+from bl.vl.kb import KnowledgeBase as KB
+from bl.vl.utils import get_logger, LOG_LEVELS
+import bl.vl.utils.ome_utils as vlu
+from bl.vl.graph.drivers.neo4j import Neo4JDriver
+
+
+def make_parser():
+    parser = argparse.ArgumentParser(description='map wells label to illumina bead chip measures')
+    parser.add_argument('--logfile', type=str, help='log file (deafult=stderr)')
+    parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
+                        help='logging level (default=INFO)', default='INFO')
+    parser.add_argument('-H', '--host', type=str, help='OMERO server hostname')
+    parser.add_argument('-U', '--user', type=str, help='OMERO user')
+    parser.add_argument('-P', '--passwd', type=str, help='OMERO password')
+    parser.add_argument('--in_file', type=str, help='input file',
+                        required=True)
+    parser.add_argument('--out_file', type=str, help='output file',
+                        required=True)
+    parser.add_argument('--well_column', type=str, help='label of the column that will be mapped',
+                        default='source')
+    return parser
+
+
+def get_wells_map(plate, kb, logger):
+    logger.info('Mapping wells for plate %s', plate.label)
+    wells_map = {}
+    for w in kb.get_wells_by_plate(plate):
+        wells_map[w.label] = w
+    logger.info('Mapped %d wells', len(wells_map))
+    return wells_map
+
+
+def get_plates_map(plates_list, kb, logger):
+    logger.info('Loading TiterPlates map')
+    plates_map = {}
+    for pl in kb.get_objects(kb.TiterPlate):
+        if isinstance(pl, kb.TiterPlate) and pl.barcode in plates_list:
+            plates_map[pl.barcode] = get_wells_map(pl, kb, logger)
+    logger.info('Mapped %d plates', len(plates_map))
+    return plates_map
+
+
+def get_connected_illumina_measures(well, kb, logger):
+    logger.debug('Loading connected IlluminaBeadChipMeasures for well %s:%s', well.label,
+                 well.container.label)
+    return kb.dt.get_connected(well, aklass = kb.IlluminaBeadChipMeasures,
+                               direction = Neo4JDriver.DIRECTION_OUTGOING)
+
+
+def wells_to_illumina(in_file, out_file, column_label, kb, logger):
+    with open(in_file) as ifile, open(out_file, 'w') as ofile:
+        reader = csv.DictReader(ifile, delimiter='\t')
+        in_records = [r for r in reader]
+        plates_barcodes = set([x[column_label].split(':')[0] for x in in_records])
+        plates_map = get_plates_map(plates_barcodes, kb, logger)
+        writer = csv.DictWriter(ofile, reader.fieldnames, delimiter='\t')
+        writer.writeheader()
+        logger.info('Mapping wells to illumina bead chip measures')
+        for rec in in_records:
+            barcode, well = rec[column_label].split(':')
+            measures = get_connected_illumina_measures(plates_map[barcode][well], kb,
+                                                       logger)
+            if len(measures) != 1:
+                logger.warning('Found %d measures for well %s:%s, skipping line', len(measures),
+                               barcode, well)
+                continue
+            rec[column_label] = measures[0].label
+            writer.writerow(rec)
+
+
+def main(argv):
+    parser = make_parser()
+    args = parser.parse_args(argv)
+
+    logger = get_logger('from_well_to_illumina_measures', level=args.loglevel,
+                        filename=args.logfile)
+
+    try:
+        host = args.host or vlu.ome_host()
+        user = args.user or vlu.ome_user()
+        passwd = args.passwd or vlu.ome_passwd()
+    except ValueError, ve:
+        logger.critical(ve)
+        sys.exit(ve)
+
+    kb = KB(driver='omero')(host, user, passwd)
+    wells_to_illumina(args.in_file, args.out_file, args.well_column,
+                      kb, logger)
+    logger.info('Job completed')
+
+
+if __name__ == '__main__':
+    main(sys.argv[1:])