comparison galaxy-tools/biobank/tools/get_parents_from_sibling.py @ 0:ba6cf6ede027 draft default tip

Uploaded
author ric
date Wed, 28 Sep 2016 06:03:30 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ba6cf6ede027
1 '''
2 From a file like this
3 individual sibling
4 V08E18411BC66F4987BCA43EFC6F636224 V0AE5660BF4A7149589BE9DB3308B50327
5 V0FAE2B10F690041509739A3F4B314DC8F V00875417B31684EC2A62EE37717913445
6 V0382EF862AA4B475697C95D3777043239 V08E376727ED8E4B369DAA3B62A9395E1B
7 ....
8
9 retrieve indivual's parents using sibling informations and build a file like
10
11 individual father mother
12 V08E18411BC66F4987BCA43EFC6F636224 V027DE334753424F07B81A70053EF5B873 V035222CAEE0474AFEBB9A161D4B64914E
13 V0FAE2B10F690041509739A3F4B314DC8F V0E966B53BDCC942C09D6B6D96DE98F4F4 V0F7B6926C6FBE4F0BB38BBC6CFB13A825
14 ....
15
16 '''
17
18 import sys, csv, argparse, logging, os
19
20 from bl.vl.kb import KnowledgeBase as KB
21 import bl.vl.utils.ome_utils as vlu
22 from bl.vl.utils import get_logger, LOG_LEVELS
23
24
25 def make_parser():
26 parser = argparse.ArgumentParser(description='retrieve parents information using sibling')
27 parser.add_argument('--logfile', type=str, help='log file (default=stderr)')
28 parser.add_argument('--loglevel', type=str, choices=LOG_LEVELS,
29 help='logging level (default=INFO)', default='INFO')
30 parser.add_argument('-H', '--host', type=str, help='omero hostname')
31 parser.add_argument('-U', '--user', type=str, help='omero user')
32 parser.add_argument('-P', '--passwd', type=str, help='omero passwd')
33 parser.add_argument('--in_file', type=str, required=True,
34 help='input file with individual-sibling couples')
35 parser.add_argument('--out_file', type=str, required=True,
36 help='output file with parents information')
37 return parser
38
39
40 def main(argv):
41 parser = make_parser()
42 args = parser.parse_args(argv)
43
44 logger = get_logger('get_parents_from_sibling', level=args.loglevel,
45 filename=args.logfile)
46
47 try:
48 host = args.host or vlu.ome_host()
49 user = args.user or vlu.ome_user()
50 passwd = args.passwd or vlu.ome_passwd()
51 except ValueError, ve:
52 logger.critical(ve)
53 sys.exit(ve)
54
55 kb = KB(driver='omero')(host, user, passwd)
56
57 logger.info('Retrieving individuals')
58 inds = kb.get_objects(kb.Individual)
59 logger.info('Retrieved %d individuals' % len(inds))
60 inds_lookup = {}
61 for ind in inds:
62 inds_lookup[ind.id] = ind
63
64 with open(args.in_file) as infile:
65 reader = csv.DictReader(infile, delimiter='\t')
66 records = []
67 for row in reader:
68 try:
69 sib = inds_lookup[row['sibling']]
70 rec = {'individual' : row['individual'],
71 'father' : sib.father.id if sib.father else 'None',
72 'mother' : sib.mother.id if sib.mother else 'None'}
73 logger.info('Individual %s, father: %s - mother: %s' % (row['individual'],
74 rec['father'],
75 rec['mother']))
76 records.append(rec)
77 except KeyError:
78 logger.error('Unable to find individual %s' % row['sibling'])
79
80 logger.info('Retrieved parents for %d individuals' % len(records))
81
82 with open(args.out_file, 'w') as outfile:
83 writer = csv.DictWriter(outfile, ['individual', 'father', 'mother'],
84 delimiter='\t')
85 writer.writeheader()
86 writer.writerows(records)
87
88 if __name__ == '__main__':
89 main(sys.argv[1:])