|
0
|
1 # BEGIN_COPYRIGHT
|
|
|
2 # END_COPYRIGHT
|
|
|
3
|
|
|
4 """
|
|
|
5 A rough example of basic pedigree info generation.
|
|
|
6 """
|
|
|
7
|
|
|
8 import argparse
|
|
|
9 import collections
|
|
|
10 import csv
|
|
|
11 import os
|
|
|
12 import sys
|
|
|
13 import yaml
|
|
|
14
|
|
|
15 from bl.vl.kb import KnowledgeBase as KB
|
|
|
16 from bl.vl.kb.drivers.omero.ehr import EHR
|
|
|
17 import bl.vl.individual.pedigree as ped
|
|
|
18 import bl.vl.utils.ome_utils as vlu
|
|
|
19 from bl.vl.utils import LOG_LEVELS, get_logger
|
|
|
20
|
|
|
21 PLINK_MISSING = -9
|
|
|
22 PLINK_UNAFFECTED = 1
|
|
|
23 PLINK_AFFECTED = 2
|
|
|
24
|
|
|
25 FIELDS = ["fam_label", "ind_label", "fat_label", "mot_label", "gender"]
|
|
|
26
|
|
|
27
|
|
|
28 def load_config(config_file):
|
|
|
29 with open(config_file) as cfg:
|
|
|
30 conf = yaml.load(cfg)
|
|
|
31 return conf
|
|
|
32
|
|
|
33
|
|
|
34 class Diagnosis(object):
|
|
|
35 def __init__(self, logger, yaml_file):
|
|
|
36 self.logger = logger
|
|
|
37 if os.path.isfile(yaml_file):
|
|
|
38 self.conf = load_config(yaml_file)
|
|
|
39 else:
|
|
|
40 self.logger.critical('The config file {} does not exists'.format(
|
|
|
41 yaml_file))
|
|
|
42 sys.exit()
|
|
|
43
|
|
|
44 def get_openehr_data(self):
|
|
|
45 return self.conf['openEHR']['archetype'], self.conf['openEHR']['field']
|
|
|
46
|
|
|
47 def get_diagnosis_label(self):
|
|
|
48 return [l for l in self.get_diagnosis().iterkeys()]
|
|
|
49
|
|
|
50 def get_diagnosis(self):
|
|
|
51 results = collections.OrderedDict()
|
|
|
52 diagnosis = collections.OrderedDict(sorted(
|
|
|
53 self.conf['diagnosis'].items()))
|
|
|
54 for v in diagnosis.itervalues():
|
|
|
55 results[v['label']] = v['values']
|
|
|
56 return results
|
|
|
57
|
|
|
58 def get_unaffected_diagnosis_dictionary(self):
|
|
|
59 labels = self.get_diagnosis_label()
|
|
|
60 d = {}
|
|
|
61 for k in labels:
|
|
|
62 d[k] = 1
|
|
|
63 return d
|
|
|
64
|
|
|
65
|
|
|
66 def make_parser():
|
|
|
67 parser = argparse.ArgumentParser(
|
|
|
68 description='build the first columns of a ped file from VL')
|
|
|
69 parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
|
|
|
70 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
|
|
|
71 help='logging level', default='INFO')
|
|
|
72 parser.add_argument('--configfile', type=str, default=os.path.join(
|
|
|
73 os.path.dirname(os.path.realpath(__file__)), 'build_miniped.yaml'),
|
|
|
74 help='config file (yaml) with diagnosis dictionary')
|
|
|
75 parser.add_argument('-H', '--host', type=str, help='omero hostname')
|
|
|
76 parser.add_argument('-U', '--user', type=str, help='omero user')
|
|
|
77 parser.add_argument('-P', '--passwd', type=str, help='omero password')
|
|
|
78 parser.add_argument('-S', '--study', type=str, required=True,
|
|
|
79 help="a list of comma separated studies used to "
|
|
|
80 "retrieve individuals that will be written to "
|
|
|
81 "ped file")
|
|
|
82 parser.add_argument('--ofile', type=str, help='output file path',
|
|
|
83 required=True)
|
|
|
84 parser.add_argument('--write_header', action='store_true', default=False,
|
|
|
85 help='Write header into the output file')
|
|
|
86 return parser
|
|
|
87
|
|
|
88
|
|
|
89 def build_families(individuals, logger):
|
|
|
90 # Individuals with only one parent will be considered like founders
|
|
|
91 # for i in individuals:
|
|
|
92 # if ((i.mother is None) or (i.father is None)):
|
|
|
93 # i.mother = None
|
|
|
94 # i.father = None
|
|
|
95 logger.info("individuals: %d" % len(individuals))
|
|
|
96 # logger.info("individuals: with 0 or 2 parents: %d" % len(not_one_parent))
|
|
|
97 logger.info("analyzing pedigree")
|
|
|
98 founders, non_founders, dangling, couples, children = ped.analyze(
|
|
|
99 individuals
|
|
|
100 )
|
|
|
101 logger.info("splitting into families")
|
|
|
102 return ped.split_disjoint(individuals, children)
|
|
|
103
|
|
|
104
|
|
|
105 def main(argv):
|
|
|
106 parser = make_parser()
|
|
|
107 args = parser.parse_args(argv)
|
|
|
108
|
|
|
109 logger = get_logger('build_miniped', level=args.loglevel,
|
|
|
110 filename=args.logfile)
|
|
|
111
|
|
|
112 dobj = Diagnosis(logger, args.configfile)
|
|
|
113 logger.debug("l {}".format(dobj.get_diagnosis_label()))
|
|
|
114
|
|
|
115 try:
|
|
|
116 host = args.host or vlu.ome_host()
|
|
|
117 user = args.user or vlu.ome_user()
|
|
|
118 passwd = args.passwd or vlu.ome_passwd()
|
|
|
119 except ValueError, ve:
|
|
|
120 logger.critical(ve)
|
|
|
121 sys.exit(ve)
|
|
|
122
|
|
|
123 kb = KB(driver='omero')(host, user, passwd)
|
|
|
124 logger.debug('Loading all individuals from omero')
|
|
|
125 all_inds = kb.get_objects(kb.Individual) # store all inds to cache
|
|
|
126 logger.debug('%d individuals loaded' % len(all_inds))
|
|
|
127 studies = [kb.get_study(s) for s in args.study.split(',')]
|
|
|
128 # Removing None values
|
|
|
129 studies = set(studies)
|
|
|
130 try:
|
|
|
131 studies.remove(None)
|
|
|
132 except KeyError:
|
|
|
133 pass
|
|
|
134 studies = list(studies)
|
|
|
135 # Sorting studies
|
|
|
136 studies = sorted(studies, key=lambda k: k.label.lower())
|
|
|
137 if len(studies) == 0:
|
|
|
138 logger.error(
|
|
|
139 'No matches found for labels %s, stopping program' % args.study)
|
|
|
140 sys.exit(2)
|
|
|
141 enrolled_map = {}
|
|
|
142 for study in studies:
|
|
|
143 logger.info('Loading enrolled individuals for study %s' % study.label)
|
|
|
144 enrolled = kb.get_enrolled(study)
|
|
|
145 logger.debug('%d individuals loaded' % len(enrolled))
|
|
|
146 for en in enrolled:
|
|
|
147 if en.individual.id not in enrolled_map:
|
|
|
148 enrolled_map[en.individual.id] = (
|
|
|
149 '%s:%s' % (en.study.label, en.studyCode),
|
|
|
150 en.individual)
|
|
|
151 else:
|
|
|
152 logger.debug('Individual %s already mapped' % en.individual.id)
|
|
|
153 logger.debug('Loading EHR records')
|
|
|
154 ehr_records = kb.get_ehr_records()
|
|
|
155 logger.debug('%s EHR records loaded' % len(ehr_records))
|
|
|
156 ehr_records_map = {}
|
|
|
157 for r in ehr_records:
|
|
|
158 ehr_records_map.setdefault(r['i_id'], []).append(r)
|
|
|
159 affection_map = {}
|
|
|
160 arch, field = dobj.get_openehr_data()
|
|
|
161 for ind_id, ehr_recs in ehr_records_map.iteritems():
|
|
|
162 affection_map[ind_id] = dobj.get_unaffected_diagnosis_dictionary()
|
|
|
163 ehr = EHR(ehr_recs)
|
|
|
164 for k, v in dobj.get_diagnosis().iteritems():
|
|
|
165 for d in v:
|
|
|
166 if ehr.matches(arch, field, d):
|
|
|
167 affection_map[ind_id][k] = PLINK_AFFECTED
|
|
|
168
|
|
|
169 immuno_inds = [i for (ind_id, (st_code, i)) in enrolled_map.iteritems()]
|
|
|
170 families = build_families(immuno_inds, logger)
|
|
|
171 logger.info("found %d families" % len(families))
|
|
|
172
|
|
|
173 def resolve_label(i):
|
|
|
174 try:
|
|
|
175 return enrolled_map[i.id][0]
|
|
|
176 except KeyError:
|
|
|
177 return i.id
|
|
|
178
|
|
|
179 def resolve_pheno(i):
|
|
|
180 try:
|
|
|
181 immuno_affection = affection_map[i.id]
|
|
|
182 except KeyError:
|
|
|
183 return [(d, PLINK_MISSING) for d in dobj.get_diagnosis_label()]
|
|
|
184 return [(d, immuno_affection[d]) for d in dobj.get_diagnosis_label()]
|
|
|
185
|
|
|
186 kb.Gender.map_enums_values(kb)
|
|
|
187 gender_map = lambda x: 2 if x.enum_label() == kb.Gender.FEMALE.enum_label() \
|
|
|
188 else 1
|
|
|
189
|
|
|
190 for d in dobj.get_diagnosis_label():
|
|
|
191 FIELDS.append("_".join([d, "status"]))
|
|
|
192 with open(args.ofile, "w") as f:
|
|
|
193 writer = csv.DictWriter(f, FIELDS, delimiter="\t", lineterminator="\n")
|
|
|
194 if args.write_header:
|
|
|
195 writer.writeheader()
|
|
|
196 families_data = []
|
|
|
197 logger.info("building families data")
|
|
|
198 for k, fam in enumerate(families):
|
|
|
199 fam_label = "FAM_%d" % (k + 1)
|
|
|
200 for i in fam:
|
|
|
201 r = {"fam_label": fam_label,
|
|
|
202 "ind_label": resolve_label(i),
|
|
|
203 "fat_label": 0 if (i.father is None or i.father not in fam)
|
|
|
204 else resolve_label(i.father),
|
|
|
205 "mot_label": 0 if (i.mother is None or i.mother not in fam)
|
|
|
206 else resolve_label(i.mother),
|
|
|
207 "gender": gender_map(i.gender)}
|
|
|
208 for p in resolve_pheno(i):
|
|
|
209 r["_".join([p[0], "status"])] = p[1]
|
|
|
210 families_data.append(r)
|
|
|
211 logger.info("writing miniped")
|
|
|
212 writer.writerows(families_data)
|
|
|
213
|
|
|
214
|
|
|
215 if __name__ == "__main__":
|
|
|
216 main(sys.argv[1:])
|