Mercurial > repos > ric > test2
comparison galaxy-tools/biobank/utils/drop_flowcell_related_items.py @ 0:ba6cf6ede027 draft default tip
Uploaded
| author | ric |
|---|---|
| date | Wed, 28 Sep 2016 06:03:30 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ba6cf6ede027 |
|---|---|
| 1 import argparse, sys | |
| 2 from collections import Counter | |
| 3 | |
| 4 from bl.vl.kb import KnowledgeBase as KB | |
| 5 import bl.vl.utils.ome_utils as vlu | |
| 6 from bl.vl.utils import get_logger, LOG_LEVELS | |
| 7 from bl.vl.kb.drivers.omero.sequencing import SeqDataSample, SequencerOutput | |
| 8 | |
| 9 | |
| 10 def make_parser(): | |
| 11 parser = argparse.ArgumentParser(description='delete all items related to the given flowcell') | |
| 12 parser.add_argument('--logfile', type=str, help='log file (default=stderr)') | |
| 13 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS, | |
| 14 help='logging level (default=INFO)', default='INFO') | |
| 15 parser.add_argument('-H', '--host', type=str, help='omero hostname') | |
| 16 parser.add_argument('-U', '--user', type=str, help='omero user') | |
| 17 parser.add_argument('-P', '--passwd', type=str, help='omero password') | |
| 18 parser.add_argument('--flowcell-label', type=str, required=True, | |
| 19 help='flowcell label') | |
| 20 parser.add_argument('--datasets-only', action='store_true', | |
| 21 help='delete only data samples and data objects related to the given flowcell') | |
| 22 return parser | |
| 23 | |
| 24 | |
| 25 def get_sources(objs): | |
| 26 sources = set() | |
| 27 for x in objs: | |
| 28 try: | |
| 29 sources.add(x.action.target) | |
| 30 except AttributeError: | |
| 31 # Action has no "target" attribute, no source for item x | |
| 32 pass | |
| 33 return sources | |
| 34 | |
| 35 | |
| 36 def delete_objects(objs, kb, logger, max_retries = 3): | |
| 37 retry_ct = Counter() | |
| 38 while len(objs) > 0: | |
| 39 o = objs.pop(0) | |
| 40 if type(o) in [SeqDataSample, SequencerOutput]: | |
| 41 logger.info('Loading DataObjects for %s:%s' % (o.__class__.__name__, | |
| 42 o.label)) | |
| 43 dobjs = kb.get_data_objects(o) | |
| 44 logger.info('%d DataObjects loaded' % len(dobjs)) | |
| 45 for d in dobjs: | |
| 46 logger.info('Deleting %s:%s' % (d.__class__.__name__, | |
| 47 d.path)) | |
| 48 kb.delete(d) | |
| 49 try: | |
| 50 logger.info('Deleting %s:%s' % (o.__class__.__name__, | |
| 51 o.id)) | |
| 52 act = o.action | |
| 53 kb.delete(o) | |
| 54 try: | |
| 55 logger.info('Deleting source action %s:%s' % (act.__class__.__name__, | |
| 56 act.id)) | |
| 57 kb.delete(act) | |
| 58 except: | |
| 59 logger.info('Can\'t delete action') | |
| 60 except: | |
| 61 logger.info('Can\'t delete, putting back into objects list') | |
| 62 if retry_ct['%s:%s' % (type(o), o.id)] < max_retries: | |
| 63 objs.append(o) | |
| 64 retry_ct['%s:%s' % (type(o), o.id)] += 1 | |
| 65 else: | |
| 66 logger.info('Reached maximum retry limit for the object, skipping') | |
| 67 | |
| 68 def main(argv): | |
| 69 parser = make_parser() | |
| 70 args = parser.parse_args(argv) | |
| 71 | |
| 72 logger = get_logger('drop_flowcell_related_items', level=args.loglevel, | |
| 73 filename=args.logfile) | |
| 74 | |
| 75 try: | |
| 76 host = args.host or vlu.ome_host() | |
| 77 user = args.user or vlu.ome_user() | |
| 78 passwd = args.passwd or vlu.ome_passwd() | |
| 79 except ValueError, ve: | |
| 80 logger.critical(ve) | |
| 81 sys.exit(ve) | |
| 82 | |
| 83 kb = KB(driver='omero')(host, user, passwd) | |
| 84 | |
| 85 logger.info('Retrieving flowcell with label %s' % args.flowcell_label) | |
| 86 query = 'SELECT fc FROM FlowCell fc WHERE fc.label = :fc_label' | |
| 87 results = kb.find_all_by_query(query, {'fc_label' : args.flowcell_label}) | |
| 88 if len(results) == 0: | |
| 89 logger.info('No FlowCell with label %s in the database' % args.flowcell_label) | |
| 90 sys.exit(0) | |
| 91 fc = results[0] | |
| 92 logger.info('Loaded FlowCell with ID %s' % fc.id) | |
| 93 logger.info('Loading related Lanes') | |
| 94 lanes = list(kb.get_lanes_by_flowcell(fc)) | |
| 95 logger.info('%d Lanes loaded' % len(lanes)) | |
| 96 logger.info('Loading related LaneSlots') | |
| 97 lane_slots = [] | |
| 98 for l in lanes: | |
| 99 lane_slots += list(kb.get_laneslots_by_lane(l)) | |
| 100 logger.info('%d LaneSlots loaded' % len(lane_slots)) | |
| 101 logger.info('Loading related Tubes') | |
| 102 sub_samples = get_sources(lane_slots) | |
| 103 samples = get_sources(sub_samples) | |
| 104 logger.info('%d Tubes loaded' % (len(sub_samples) + len(samples))) | |
| 105 logger.info('Loading related SequencerOutputs') | |
| 106 seq_out = kb.dt.get_connected(fc, kb.SequencerOutput, kb.dt.DIRECTION_OUTGOING) | |
| 107 logger.info('%d SequencerOutputs loaded' % len(seq_out)) | |
| 108 logger.info('Loading related SeqDataSamples') | |
| 109 seq_dsamples = kb.dt.get_connected(fc, kb.SeqDataSample, kb.dt.DIRECTION_OUTGOING) | |
| 110 logger.info('%d SeqDataSamples loaded' % len(seq_dsamples)) | |
| 111 | |
| 112 if args.datasets_only: | |
| 113 delete_items = [seq_dsamples] | |
| 114 else: | |
| 115 delete_items = [seq_dsamples, seq_out, lane_slots, lanes, | |
| 116 [fc], list(sub_samples), list(samples)] | |
| 117 for items in delete_items: | |
| 118 delete_objects(items, kb, logger) | |
| 119 | |
| 120 logger.info('Job completed') | |
| 121 | |
| 122 | |
| 123 if __name__ == '__main__': | |
| 124 main(sys.argv[1:]) |
