|
3
|
1 # BEGIN_COPYRIGHT
|
|
|
2 # END_COPYRIGHT
|
|
|
3
|
|
|
4 """
|
|
|
5 A rough example of basic pedigree info generation.
|
|
|
6 """
|
|
|
7
|
|
|
8 import csv, argparse, sys, os
|
|
|
9
|
|
|
10 from bl.vl.kb import KnowledgeBase as KB
|
|
|
11 from bl.vl.kb.drivers.omero.ehr import EHR
|
|
|
12 import bl.vl.individual.pedigree as ped
|
|
|
13 import bl.vl.utils.ome_utils as vlu
|
|
|
14 from bl.vl.utils import LOG_LEVELS, get_logger
|
|
|
15
|
|
|
16 DIAGNOSIS_ARCH = 'openEHR-EHR-EVALUATION.problem-diagnosis.v1'
|
|
|
17 DIAGNOSIS_FIELD = 'at0002.1'
|
|
|
18 T1D_ICD10 = 'icd10-cm:E10'
|
|
|
19 MS_ICD10 = 'icd10-cm:G35'
|
|
|
20 NEFRO_ICD10 = 'icd10-cm:E23.2'
|
|
|
21
|
|
|
22 PLINK_MISSING = -9
|
|
|
23 PLINK_UNAFFECTED = 1
|
|
|
24 PLINK_AFFECTED = 2
|
|
|
25
|
|
|
26 FIELDS = ["fam_label", "ind_label", "fat_label", "mot_label", "gender", "t1d_status", "ms_status", "nefro_status"]
|
|
|
27
|
|
|
28
|
|
|
29 def make_parser():
|
|
|
30 parser = argparse.ArgumentParser(description='build the first columns of a ped file from VL')
|
|
|
31 parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
|
|
|
32 parser.add_argument('--loglevel', type=str, choices = LOG_LEVELS,
|
|
|
33 help='logging level', default='INFO')
|
|
|
34 parser.add_argument('-H', '--host', type=str, help='omero hostname')
|
|
|
35 parser.add_argument('-U', '--user', type=str, help='omero user')
|
|
|
36 parser.add_argument('-P', '--passwd', type=str, help='omero password')
|
|
|
37 parser.add_argument('-S', '--study', type=str, required=True,
|
|
|
38 help='a list of comma separated studies used to retrieve individuals that will be written to ped file')
|
|
|
39 parser.add_argument('--ofile', type=str, help='output file path',
|
|
|
40 required=True)
|
|
|
41 return parser
|
|
|
42
|
|
|
43 def build_families(individuals, logger):
|
|
|
44 # Individuals with only one parent will be considered like founders
|
|
|
45 # for i in individuals:
|
|
|
46 # if ((i.mother is None) or (i.father is None)):
|
|
|
47 # i.mother = None
|
|
|
48 # i.father = None
|
|
|
49 logger.info("individuals: %d" % len(individuals))
|
|
|
50 #logger.info("individuals: with 0 or 2 parents: %d" % len(not_one_parent))
|
|
|
51 logger.info("analyzing pedigree")
|
|
|
52 founders, non_founders, dangling, couples, children = ped.analyze(
|
|
|
53 individuals
|
|
|
54 )
|
|
|
55 logger.info("splitting into families")
|
|
|
56 return ped.split_disjoint(individuals, children)
|
|
|
57
|
|
|
58
|
|
|
59 def main(argv):
|
|
|
60 parser = make_parser()
|
|
|
61 args = parser.parse_args(argv)
|
|
|
62
|
|
|
63 logger = get_logger('build_miniped', level=args.loglevel,
|
|
|
64 filename=args.logfile)
|
|
|
65
|
|
|
66 try:
|
|
|
67 host = args.host or vlu.ome_host()
|
|
|
68 user = args.user or vlu.ome_user()
|
|
|
69 passwd = args.passwd or vlu.ome_passwd()
|
|
|
70 except ValueError, ve:
|
|
|
71 logger.critical(ve)
|
|
|
72 sys.exit(ve)
|
|
|
73
|
|
|
74 kb = KB(driver='omero')(host, user, passwd)
|
|
|
75 logger.debug('Loading all individuals from omero')
|
|
|
76 all_inds = kb.get_objects(kb.Individual) # store all inds to cache
|
|
|
77 logger.debug('%d individuals loaded' % len(all_inds))
|
|
|
78 studies = [kb.get_study(s) for s in args.study.split(',')]
|
|
|
79 # Removing None values
|
|
|
80 studies = set(studies)
|
|
|
81 try:
|
|
|
82 studies.remove(None)
|
|
|
83 except KeyError:
|
|
|
84 pass
|
|
|
85 studies = list(studies)
|
|
|
86 if len(studies) == 0:
|
|
|
87 logger.error('No matches found for labels %s, stopping program' % args.study)
|
|
|
88 sys.exit(2)
|
|
|
89 enrolled_map = {}
|
|
|
90 for study in studies:
|
|
|
91 logger.info('Loading enrolled individuals for study %s' % study.label)
|
|
|
92 enrolled = kb.get_enrolled(study)
|
|
|
93 logger.debug('%d individuals loaded' % len(enrolled))
|
|
|
94 for en in enrolled:
|
|
|
95 if en.individual.id not in enrolled_map:
|
|
|
96 enrolled_map[en.individual.id] = ('%s:%s' % (en.study.label, en.studyCode),
|
|
|
97 en.individual)
|
|
|
98 else:
|
|
|
99 logger.debug('Individual %s already mapped' % en.individual.id)
|
|
|
100 logger.debug('Loading EHR records')
|
|
|
101 ehr_records = kb.get_ehr_records()
|
|
|
102 logger.debug('%s EHR records loaded' % len(ehr_records))
|
|
|
103 ehr_records_map = {}
|
|
|
104 for r in ehr_records:
|
|
|
105 ehr_records_map.setdefault(r['i_id'], []).append(r)
|
|
|
106 affection_map = {}
|
|
|
107 for ind_id, ehr_recs in ehr_records_map.iteritems():
|
|
|
108 affection_map[ind_id] = dict(t1d=PLINK_UNAFFECTED, ms=PLINK_UNAFFECTED,
|
|
|
109 nefro=PLINK_UNAFFECTED)
|
|
|
110 ehr = EHR(ehr_recs)
|
|
|
111 if ehr.matches(DIAGNOSIS_ARCH, DIAGNOSIS_FIELD, T1D_ICD10):
|
|
|
112 affection_map[ind_id]['t1d'] = PLINK_AFFECTED
|
|
|
113 if ehr.matches(DIAGNOSIS_ARCH, DIAGNOSIS_FIELD, MS_ICD10):
|
|
|
114 affection_map[ind_id]['ms'] = PLINK_AFFECTED
|
|
|
115 if ehr.matches(DIAGNOSIS_ARCH, DIAGNOSIS_FIELD, NEFRO_ICD10):
|
|
|
116 affection_map[ind_id]['nefro'] = PLINK_AFFECTED
|
|
|
117
|
|
|
118 immuno_inds = [i for (ind_id, (st_code, i)) in enrolled_map.iteritems()]
|
|
|
119 families = build_families(immuno_inds, logger)
|
|
|
120 logger.info("found %d families" % len(families))
|
|
|
121
|
|
|
122 def resolve_label(i):
|
|
|
123 try:
|
|
|
124 return enrolled_map[i.id][0]
|
|
|
125 except KeyError:
|
|
|
126 return i.id
|
|
|
127
|
|
|
128 def resolve_pheno(i):
|
|
|
129 try:
|
|
|
130 immuno_affection = affection_map[i.id]
|
|
|
131 except KeyError:
|
|
|
132 return PLINK_MISSING, PLINK_MISSING, PLINK_MISSING
|
|
|
133 return immuno_affection["t1d"], immuno_affection["ms"], immuno_affection["nefro"]
|
|
|
134
|
|
|
135 kb.Gender.map_enums_values(kb)
|
|
|
136 gender_map = lambda x: 2 if x == kb.Gender.FEMALE else 1
|
|
|
137
|
|
|
138 logger.info("writing miniped")
|
|
|
139 with open(args.ofile, "w") as f:
|
|
|
140 writer = csv.DictWriter(f, FIELDS, delimiter="\t", lineterminator="\n")
|
|
|
141 for k, fam in enumerate(families):
|
|
|
142 fam_label = "FAM_%d" % (k+1)
|
|
|
143 for i in fam:
|
|
|
144 r = {}
|
|
|
145 r["fam_label"] = fam_label
|
|
|
146 r["ind_label"] = resolve_label(i)
|
|
|
147 r["fat_label"] = 0 if (i.father is None or i.father not in fam) else resolve_label(i.father)
|
|
|
148 r["mot_label"] = 0 if (i.mother is None or i.mother not in fam) else resolve_label(i.mother)
|
|
|
149 r["gender"] = gender_map(i.gender)
|
|
|
150 r["t1d_status"], r["ms_status"], r["nefro_status"] = resolve_pheno(i)
|
|
|
151 writer.writerow(r)
|
|
|
152
|
|
|
153
|
|
|
154 if __name__ == "__main__":
|
|
|
155 main(sys.argv[1:])
|