3
|
1 import sys, csv, argparse, logging, os
|
|
2 from collections import Counter
|
|
3
|
|
4 from bl.vl.kb import KnowledgeBase as KB
|
|
5 import bl.vl.utils.ome_utils as vlu
|
|
6 from bl.vl.utils import get_logger, LOG_LEVELS
|
|
7
|
|
8
|
|
9 def make_parser():
|
|
10 parser = argparse.ArgumentParser(description='check data that will be passed to the update_parents tool')
|
|
11 parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
|
|
12 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
|
|
13 help='logging level (default=INFO)', default='INFO')
|
|
14 parser.add_argument('-H', '--host', type=str, help='omero hostname')
|
|
15 parser.add_argument('-U', '--user', type=str, help='omero user')
|
|
16 parser.add_argument('-P', '--passwd', type=str, help='omero password')
|
|
17 parser.add_argument('--in_file', type=str, required=True,
|
|
18 help='input file')
|
|
19 parser.add_argument('--out_file', type=str, required=True,
|
|
20 help='output file')
|
|
21 return parser
|
|
22
|
|
23
|
|
24 def check_row(row, individuals_map, kb, logger):
|
|
25 logger.debug('Checking record %r' % row)
|
|
26 try:
|
|
27 ind = individuals_map[row['individual']]
|
|
28 logger.info('%s is a valid Individual ID' % ind.id)
|
|
29 if row['father'] != 'None':
|
|
30 father = individuals_map[row['father']]
|
|
31 logger.info('%s is a valid Individual ID' % father.id)
|
|
32 check_gender(father, kb.Gender.MALE)
|
|
33 logger.info('Gender check passed')
|
|
34 else:
|
|
35 logger.info('None value, no check required')
|
|
36 if row['mother'] != 'None':
|
|
37 mother = individuals_map[row['mother']]
|
|
38 logger.info('%s is a valid Individual ID' % mother.id)
|
|
39 check_gender(mother, kb.Gender.FEMALE)
|
|
40 logger.info('Gender check passed')
|
|
41 else:
|
|
42 logger.info('None value, no check required')
|
|
43 return True
|
|
44 except KeyError, ke:
|
|
45 logger.error('%s is not a valid Individual ID, rejecting row' % ke)
|
|
46 return False
|
|
47 except ValueError, ve:
|
|
48 logger.error(ve)
|
|
49 return False
|
|
50
|
|
51
|
|
52 def check_gender(individual, gender):
|
|
53 if individual.gender.enum_label() != gender.enum_label():
|
|
54 raise ValueError('Gender for individual %s is %s, expected %s, rejecting row' % (individual.id,
|
|
55 individual.gender.enum_label(),
|
|
56 gender.enum_label()))
|
|
57 else:
|
|
58 pass
|
|
59
|
|
60
|
|
61 def main(argv):
|
|
62 parser = make_parser()
|
|
63 args = parser.parse_args(argv)
|
|
64
|
|
65 logger = get_logger('check_update_parents_data', level=args.loglevel,
|
|
66 filename=args.logfile)
|
|
67
|
|
68 try:
|
|
69 host = args.host or vlu.ome_host()
|
|
70 user = args.user or vlu.ome_user()
|
|
71 passwd = args.passwd or vlu.ome_passwd()
|
|
72 except ValueError, ve:
|
|
73 logger.critical(ve)
|
|
74 sys.exit(ve)
|
|
75
|
|
76 kb = KB(driver='omero')(host, user, passwd)
|
|
77
|
|
78 logger.info('Preloading all individuals from the system')
|
|
79 inds = kb.get_objects(kb.Individual)
|
|
80 logger.info('%d individuals loaded' % len(inds))
|
|
81 inds_lookup = {}
|
|
82 for i in inds:
|
|
83 inds_lookup[i.id] = i
|
|
84
|
|
85 with open(args.in_file) as infile, open(args.out_file, 'w') as outfile:
|
|
86 reader = csv.DictReader(infile, delimiter='\t')
|
|
87 records = list(reader)
|
|
88 logger.info('Check for duplicated in \'individual\' column')
|
|
89 recs_by_ind = {}
|
|
90 for rec in records:
|
|
91 recs_by_ind.setdefault(rec['individual'], []).append(rec)
|
|
92 ct = Counter()
|
|
93 duplicated = []
|
|
94 for k,v in recs_by_ind.iteritems():
|
|
95 if len(v) > 1:
|
|
96 duplicated.append(k)
|
|
97 for dupl in duplicated:
|
|
98 logger.info('Individual %s is a duplicated' % dupl)
|
|
99 for r in recs_by_ind.pop(dupl):
|
|
100 logger.info('Removing record %r' % r)
|
|
101 good_records = sum(recs_by_ind.itervalues(), [])
|
|
102 logger.info('Duplicated check completed')
|
|
103 writer = csv.DictWriter(outfile, reader.fieldnames, delimiter='\t')
|
|
104 writer.writeheader()
|
|
105 logger.info('Checking records')
|
|
106 for row in good_records:
|
|
107 if check_row(row, inds_lookup, kb, logger):
|
|
108 writer.writerow(row)
|
|
109 logger.debug('Record %r written in output file' % row)
|
|
110 logger.info('Records check completed')
|
|
111
|
|
112 if __name__ == '__main__':
|
|
113 main(sys.argv[1:])
|