3
|
1 """
|
|
2 prepare a tsv to be imported with a study code foreach individuals not
|
|
3 present in a specified study.
|
|
4
|
|
5 Can be specified also a study from which each individuals enrolled in will
|
|
6 be ignored
|
|
7
|
|
8 Report file contains enrollments codes in the others studies
|
|
9
|
|
10 Codes are short hashes from numbers generated using Hashids.org with
|
|
11 study label as salt parameter
|
|
12
|
|
13 ex:
|
|
14 source study label
|
|
15 V03CB1DB357B274B17B139EA56A2FFA19E AUTOIMMUNITY ORVL5KMK5
|
|
16 V0BA695C2E326F4C13AD7F6052BB20539B AUTOIMMUNITY 9R0M2E12N
|
|
17 V067C445E35DA04ECCA21FA3E2DF3BBCF6 AUTOIMMUNITY QGZLQJ1RV
|
|
18 ...
|
|
19
|
|
20 """
|
|
21
|
|
22 import argparse
|
|
23 import csv
|
|
24 import string
|
|
25 import sys
|
|
26
|
|
27 from hashids import Hashids
|
|
28 from bl.vl.kb import KnowledgeBase as KB
|
|
29 from bl.vl.utils import LOG_LEVELS, get_logger
|
|
30 import bl.vl.utils.ome_utils as vlu
|
|
31
|
|
32
|
|
33 def make_parser():
|
|
34 parser = argparse.ArgumentParser(description='Retrieve all individuals not enrolled in the specified project')
|
|
35 parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
|
|
36 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
|
|
37 help='logger level', default='INFO')
|
|
38 parser.add_argument('--study', type=str, help='Study label', required=True)
|
|
39 parser.add_argument('--study_to_be_ignored', type=str,
|
|
40 help='Study label to be ignored')
|
|
41 parser.add_argument('--host', type=str, help='Omero hostname')
|
|
42 parser.add_argument('--user', type=str, help='Omero user')
|
|
43 parser.add_argument('--passwd', type=str, help='Omero password')
|
|
44 parser.add_argument('--ofile', type=str, help='output file path',
|
|
45 required=True)
|
|
46 parser.add_argument('--reportfile', type=str, help='report file',
|
|
47 default='report.tsv')
|
|
48 return parser
|
|
49
|
|
50
|
|
51 def init_hashids(study):
|
|
52 hashids = Hashids(salt=study, min_length=9,
|
|
53 alphabet=string.ascii_uppercase + string.digits)
|
|
54 return hashids
|
|
55
|
|
56
|
|
57 def write_csv_to_be_enrolled(logger, hashids, path, inds_map,
|
|
58 highest_id=0):
|
|
59 csv_header = ['source', 'study', 'label']
|
|
60 study_id = highest_id
|
|
61
|
|
62 # Write to CSV file
|
|
63 logger.debug('Writing CSV file %s' % path)
|
|
64 with open(path, 'w') as f:
|
|
65 writer = csv.DictWriter(f, csv_header,
|
|
66 delimiter='\t', quotechar='"',
|
|
67 restval='None')
|
|
68 writer.writeheader()
|
|
69 for k, v in inds_map.iteritems():
|
|
70 study_id += 1
|
|
71 v['label'] = hashids.encrypt(study_id)
|
|
72 writer.writerow(v)
|
|
73 return
|
|
74
|
|
75
|
|
76 def write_csv_enrollment_codes(logger, filename, csv_header, enrolls_map):
|
|
77 logger.debug('Writing CSV file %s' % filename)
|
|
78 with open(filename, 'w') as f:
|
|
79 writer = csv.DictWriter(f, csv_header,
|
|
80 delimiter='\t', quotechar='"',
|
|
81 restval='None')
|
|
82 writer.writeheader()
|
|
83 for k, v in enrolls_map.iteritems():
|
|
84 writer.writerow(v)
|
|
85 return
|
|
86
|
|
87
|
|
88 def get_enrollments_codes(logger, kb, inds_map):
|
|
89 """Retrieve enrollments codes in other studies for the individuals
|
|
90 to be enrolled into the specified study"""
|
|
91 # Retrieve all studies from omero
|
|
92 studies = kb.get_objects(kb.Study)
|
|
93 logger.info('Retrieved %d studies from database' % len(studies))
|
|
94
|
|
95 csv_header = ['individual_uuid']
|
|
96 enrolls_map = {}
|
|
97 # For each study, retrieve all enrollments
|
|
98 for s in studies:
|
|
99 logger.info('Retrieving enrollments for study %s' % s.label)
|
|
100 enrolls = kb.get_enrolled(s)
|
|
101 logger.info('%s enrollments retrieved' % len(enrolls))
|
|
102 if len(enrolls) > 0:
|
|
103 logger.debug('Building lookup dictionary....')
|
|
104 csv_header.append(s.label) # Add study label to CSV header
|
|
105 for e in enrolls:
|
|
106 if e.individual.id in inds_map:
|
|
107 enrolls_map.setdefault(e.individual.omero_id,
|
|
108 {})['individual_uuid'] = e.individual.id
|
|
109 enrolls_map[e.individual.omero_id][s.label] = e.studyCode
|
|
110 else:
|
|
111 logger.debug('No enrollments found, skip study %s' % s.label)
|
|
112
|
|
113 return csv_header, enrolls_map
|
|
114
|
|
115
|
|
116 def main(argv):
|
|
117 parser = make_parser()
|
|
118 args = parser.parse_args(argv)
|
|
119
|
|
120 logger = get_logger('inds_not_enrolled', level=args.loglevel,
|
|
121 filename=args.logfile)
|
|
122 try:
|
|
123 host = args.host or vlu.ome_host()
|
|
124 user = args.user or vlu.ome_user()
|
|
125 passwd = args.passwd or vlu.ome_passwd()
|
|
126 except ValueError, ve:
|
|
127 logger.critical(ve)
|
|
128 sys.exit(ve)
|
|
129
|
|
130 out_file_path = args.ofile
|
|
131
|
|
132 kb = KB(driver='omero')(host, user, passwd)
|
|
133
|
|
134 inds = kb.get_objects(kb.Individual)
|
|
135 #len_inds = len(inds)
|
|
136 logger.info('Retrieved {} individuals'.format(len(inds)))
|
|
137
|
|
138 inds_map = {}
|
|
139
|
|
140 for i in inds:
|
|
141 inds_map.setdefault(i.id, {})['source'] = i.id
|
|
142 inds_map[i.id]['study'] = args.study
|
|
143
|
|
144 study = kb.get_by_label(kb.Study, args.study)
|
|
145 if study:
|
|
146 logger.info('{} present in the database'.format(study.label))
|
|
147 else:
|
|
148 logger.critical('{} not present in the database'.format(args.study))
|
|
149 sys.exit()
|
|
150
|
|
151 hashids = init_hashids(study.label)
|
|
152 enrolls = kb.get_enrolled(study)
|
|
153 logger.info("{} enrollments founded in {}".format(len(enrolls),
|
|
154 study.label))
|
|
155 highest_id = 0
|
|
156 #ids = []
|
|
157
|
|
158 for e in enrolls:
|
|
159 if e.individual.id in inds_map:
|
|
160 del inds_map[e.individual.id]
|
|
161 _ = hashids.decrypt(e.studyCode)
|
|
162 if _ > highest_id:
|
|
163 highest_id = _[0]
|
|
164
|
|
165 if args.study_to_be_ignored and kb.get_by_label(kb.Study,
|
|
166 args.study_to_be_ignored):
|
|
167 to_be_removed = [args.study_to_be_ignored]
|
|
168 else:
|
|
169 to_be_removed = []
|
|
170
|
|
171 for tbr_study in to_be_removed:
|
|
172 enr = kb.get_enrolled(kb.get_by_label(kb.Study, tbr_study))
|
|
173 logger.info('Retrieved {} enrollments from {}'.format(len(enr),
|
|
174 tbr_study))
|
|
175 for e in enr:
|
|
176 if e.individual.id in inds_map:
|
|
177 del inds_map[e.individual.id]
|
|
178
|
|
179 logger.info('{} individuals to be enrolled'.format(len(inds_map)))
|
|
180
|
|
181 write_csv_to_be_enrolled(logger, hashids, out_file_path, inds_map, highest_id)
|
|
182
|
|
183 csv_header, enrolls_map = get_enrollments_codes(logger, kb, inds_map)
|
|
184 write_csv_enrollment_codes(logger, args.reportfile, csv_header, enrolls_map)
|
|
185
|
|
186 if __name__ == '__main__':
|
|
187 main(sys.argv[1:])
|