Mercurial > repos > artbio > ngsplot
comparison bin/ngsplotdb.py @ 0:3ca58369469c draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/ngsplot commit b'e9fcc157a7f2f2fa9d6ac9a58d425ff17c975f5c\n'
author | artbio |
---|---|
date | Wed, 06 Dec 2017 19:01:53 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3ca58369469c |
---|---|
1 #!/usr/bin/env python | |
2 # | |
3 # ngsplotdb - PYTHON script to manipulate the ngs.plot database installation. | |
4 # Author: Li Shen, Mount Sinai School of Medicine | |
5 # Date created: Dec 28, 2012 | |
6 # Last updated: May 28, 2013 | |
7 # | |
8 # Functions implemented: | |
9 # | |
10 # list - List locally installed genomes. | |
11 # check - Check the integrity of local database. | |
12 # install - Install a genome from ngsplotdb.XXX.tar.gz. | |
13 # remove - Remove a locally installed genome. | |
14 | |
15 | |
16 # def dir_to_idspec(dirn): | |
17 # """Convert an FTP directory name to species ID and Name. | |
18 # This works with both UCSC and Ensembl FTP. | |
19 # """ | |
20 # parts = dirn.split('_') | |
21 # sid = parts[0][:3].lower() + parts[1][:3].lower() | |
22 # spec = str.title(parts[0] + ' ' + parts[1]) | |
23 # return sid, spec | |
24 | |
25 # def listremote(database="refseq"): | |
26 # """List available genomes on a remote FTP site.""" | |
27 | |
28 # import urllib2 | |
29 | |
30 # url_ensembl = "ftp://ftp.ensembl.org/pub/current_gtf/" | |
31 # url_ucsc = "ftp://hgdownload.cse.ucsc.edu/goldenPath/currentGenomes/" | |
32 | |
33 # if database == "refseq": | |
34 # ftp = urllib2.urlopen(url_ucsc) | |
35 # elif database == "ensembl": | |
36 # ftp = urllib2.urlopen(url_ensembl) | |
37 # else: | |
38 # pass | |
39 | |
40 # # Read directory names from the remote FTP site. | |
41 # dir_names = ftp.read().splitlines() | |
42 # # Parse lines to obtain clean directory names only. | |
43 # if database == "refseq": | |
44 # dir_names = map(lambda x: x.split()[-3], dir_names) | |
45 # elif database == "ensembl": | |
46 # dir_names = map(lambda x: x.split()[-1], dir_names) | |
47 # else: | |
48 # pass | |
49 # # Filter directory names that do not correspond to a species. | |
50 # # On UCSC FTP, both "." and ".." will be retrieved from above and can | |
51 # # cause troubles. | |
52 | |
53 # id_spec_list = map(dir_to_idspec, dir_names) | |
54 # print "ID\tName" | |
55 # for r in id_spec_list: | |
56 # print r[0], "\t", r[1] | |
57 | |
58 def read_gnlist(root_path, mode): | |
59 """Read installed genomes list. | |
60 | |
61 Args: | |
62 root_path: ngs.plot installation directory. | |
63 mode: row reading mode - "vector" or "hash", correspond to each record | |
64 read as a vector of hash table. | |
65 Returns: (header split vector, hash table of installed genomes, | |
66 vector of column widths) | |
67 """ | |
68 from collections import defaultdict | |
69 | |
70 gn_list = root_path + "/database/gn_list.txt" | |
71 try: | |
72 db_f = open(gn_list, "r") | |
73 except IOError: | |
74 print "Open {0} error: your ngs.plot database may be corrupted.".\ | |
75 format(gn_list) | |
76 sys.exit() | |
77 | |
78 default_list = root_path + "/database/default.tbl" | |
79 try: | |
80 default_f = open(default_list, "r") | |
81 except IOError: | |
82 print "Open {0} error: your ngs.plot database may be corrupted.".\ | |
83 format(default_list) | |
84 sys.exit() | |
85 default_l = [] | |
86 header = default_f.readline() # skip the header | |
87 for rec in default_f: | |
88 r_sp = rec.rstrip().split("\t") | |
89 default_l.append((r_sp[0], r_sp[2])) # tuple of genome and region, like ("dm3", "genebody") | |
90 genome_region_dict = defaultdict(list) | |
91 for genome,region in default_l: | |
92 genome_region_dict[genome].append(region) | |
93 | |
94 header = db_f.readline() | |
95 h_sp = header.rstrip().split("\t") | |
96 h_sp.append("InstalledFeatures") | |
97 v_cw = map(len, h_sp) # column widths initialize to header widths. | |
98 | |
99 g_tbl = {} | |
100 for rec in db_f: | |
101 r_sp = rec.rstrip().split("\t") | |
102 r_sp.append(",".join(genome_region_dict[r_sp[0]])) | |
103 r_cw = map(len, r_sp) | |
104 v_cw = map(max, v_cw, r_cw) | |
105 | |
106 r_tbl = dict(zip(h_sp, r_sp)) | |
107 if(mode == "vector"): | |
108 g_tbl[r_tbl["ID"]] = r_sp | |
109 elif(mode == "hash"): | |
110 g_tbl[r_tbl["ID"]] = r_tbl | |
111 else: | |
112 pass | |
113 | |
114 db_f.close() | |
115 | |
116 return (h_sp, g_tbl, v_cw) | |
117 | |
118 | |
119 def update_gnlist(root_path, g_tbl, gn, assembly, species, ens_v, np_v): | |
120 """Update/Add genomes in ngs.plot database. | |
121 | |
122 Args: | |
123 root_path: ngs.plot installation directory. | |
124 g_tbl: genome hash table. Each value is also a hash table of | |
125 genome info. | |
126 gn: genome name. | |
127 assembly: genome assembly name. | |
128 species: species name. | |
129 ens_v: new Ensembl version. | |
130 np_v: new ngs.plot version. | |
131 Returns: None | |
132 """ | |
133 | |
134 if gn in g_tbl: | |
135 g_tbl[gn]["EnsVer"] = ens_v | |
136 g_tbl[gn]["NPVer"] = np_v | |
137 else: | |
138 g_tbl[gn] = {"ID": gn, "Assembly": assembly, "Species": species, \ | |
139 "EnsVer": ens_v, "NPVer": np_v} | |
140 | |
141 write_gnlist(root_path, g_tbl) | |
142 | |
143 | |
144 def write_gnlist(root_path, g_tbl): | |
145 """Write genome hash table to database file: gn_list.txt. | |
146 | |
147 Args: | |
148 root_path: ngs.plot installation directory. | |
149 g_tbl: genome hash table. Each value is also a hash table of | |
150 genome info. | |
151 Returns: None | |
152 """ | |
153 | |
154 output_order = ["ID", "Assembly", "Species", "EnsVer", "NPVer"] | |
155 gn_list = root_path + "/database/gn_list.txt" | |
156 | |
157 try: | |
158 db_f = open(gn_list, "w") | |
159 except IOError: | |
160 print "Open {0} error: your ngs.plot database may be corrupted.".\ | |
161 format(gn_list) | |
162 sys.exit() | |
163 | |
164 db_f.write("\t".join(output_order) + "\n") | |
165 | |
166 for k in g_tbl.viewkeys(): | |
167 rec = [] | |
168 for col in output_order: | |
169 rec.append(str(g_tbl[k][col])) | |
170 db_f.write("\t".join(rec) + "\n") | |
171 | |
172 db_f.close() | |
173 | |
174 | |
175 def listgn(root_path, args): | |
176 """List installed genomes in local database on screen. | |
177 | |
178 Args: | |
179 root_path: ngs.plot installation directory. | |
180 args: currently not used. | |
181 Returns: None. | |
182 """ | |
183 | |
184 import math | |
185 | |
186 (h_sp, g_tbl, v_cw) = read_gnlist(root_path, "vector") | |
187 | |
188 # Format column widths to beautify output. | |
189 tab_u = 4 | |
190 v_cw = map(lambda x: int(math.ceil(float(x) / tab_u) * tab_u + 1), v_cw) | |
191 | |
192 # List genomes to screen. | |
193 print "".join(map(lambda x, y: x.ljust(y), h_sp, v_cw)) # header. | |
194 for k in sorted(g_tbl.viewkeys(), key=str.lower): | |
195 print "".join(map(lambda x, y: x.ljust(y), g_tbl[k], v_cw)) | |
196 | |
197 def isExAnno(pkg_files): | |
198 """If the package is an extension package based on cell lines for ngs.plot, | |
199 like enhancer or dhs. | |
200 | |
201 Args: | |
202 pkg_files: list of files in the package. | |
203 Returns: | |
204 isEx: Bool, if the package is an extension package. | |
205 feature: string, feature name contained in the extension package. None | |
206 if not an extension package. | |
207 RDcount: number of RData tables in the package. | |
208 """ | |
209 | |
210 import os.path | |
211 | |
212 exclusive_files = [".chrnames.refseq", ".chrnames.ensembl", ".metainfo"] | |
213 isEx = False | |
214 RDcount = 0 | |
215 feature = None | |
216 for file_name in pkg_files: | |
217 if os.path.basename(file_name) in exclusive_files: | |
218 continue | |
219 if (not file_name.endswith("RData")) and (file_name.count("/")) > 0: | |
220 isEx = True | |
221 feature = file_name.split("/")[1] | |
222 elif file_name.endswith("RData"): | |
223 RDcount += 1 | |
224 return (isEx, feature, RDcount) | |
225 | |
226 def install(root_path, args): | |
227 """Interactive session for installing genome from package file. | |
228 | |
229 Args: | |
230 root_path: ngs.plot installation directory. | |
231 args.yes: say yes to all questions. | |
232 args.pkg: path of package file. | |
233 Returns: | |
234 None. | |
235 """ | |
236 | |
237 import tarfile | |
238 import sys | |
239 | |
240 yestoall = args.yes | |
241 pkg_file = args.pkg | |
242 # Package name: e.g. ngsplotdb_mm10_71_1.0.tar.gz. | |
243 # Information about the genome is in mm10/.metainfo. | |
244 try: | |
245 pkg_f = tarfile.open(pkg_file, "r:gz") | |
246 except tarfile.ReadError: | |
247 print "Read package file {0} error.".format(pkg_file), | |
248 print "The downloaded file may be corrupted." | |
249 sys.exit() | |
250 | |
251 print "Extracting information from package...", | |
252 sys.stdout.flush() | |
253 pkg_files = pkg_f.getnames() | |
254 g_folder = pkg_files[0] | |
255 # Minus folder name and .metainfo file name. | |
256 (isEx, feature, RDcount) = isExAnno(pkg_files) | |
257 if isEx: | |
258 print feature + " extension package, ", | |
259 print "contains " + str(RDcount + 2) + " tables." | |
260 else: | |
261 print "contains " + str(RDcount + 2) + " tables." | |
262 # .metainfo file: | |
263 # "ID"<TAB>mm10 | |
264 # "Assembly"<TAB>GRCm38 | |
265 # "Species"<TAB>mus_musculus | |
266 # "EnsVer"<TAB>71 | |
267 # "NPVer"<TAB>1.0 | |
268 meta_f = pkg_f.extractfile(g_folder + "/.metainfo") | |
269 meta_tbl = {} | |
270 if meta_f: | |
271 for rec in meta_f: | |
272 rec_sp = rec.strip().split("\t") | |
273 meta_tbl[rec_sp[0]] = rec_sp[1] # format: Variable<TAB>Value | |
274 else: | |
275 print "No meta information found in the package file.", | |
276 print "The downloaded file may be corrupted." | |
277 sys.exit() | |
278 meta_f.close() | |
279 pkg_f.close() | |
280 | |
281 gn_inst = meta_tbl["ID"] | |
282 assembly = meta_tbl["Assembly"] | |
283 species = meta_tbl["Species"] | |
284 ens_ver = float(meta_tbl["EnsVer"]) | |
285 np_ver = float(meta_tbl["NPVer"]) | |
286 | |
287 # Database file. | |
288 (h_sp, g_tbl, v_cw) = read_gnlist(root_path, "hash") | |
289 | |
290 if gn_inst in g_tbl: | |
291 installed_ens = float(g_tbl[gn_inst]["EnsVer"]) | |
292 installed_np = float(g_tbl[gn_inst]["NPVer"]) | |
293 | |
294 # For extension package. | |
295 # Only the same version with basic package could be installed. | |
296 if isEx: | |
297 if ens_ver == installed_ens and np_ver == installed_np: | |
298 print "Will upgrade the genome {0} with {1} annotation:".format(gn_inst, \ | |
299 feature), | |
300 print "Ensembl: {0}; ngs.plot: {1}.".\ | |
301 format(installed_ens, np_ver) | |
302 if yestoall: | |
303 install_pkg(root_path, pkg_file, gn_inst) | |
304 update_gnlist(root_path, g_tbl, gn_inst, assembly, \ | |
305 species, ens_ver, np_ver) | |
306 else: | |
307 ans = raw_input("Continue?(y/n): ") | |
308 while True: | |
309 if(ans == "y" or ans == "Y" or ans == "n" or ans == "N"): | |
310 break | |
311 else: | |
312 ans = raw_input("Answer must be y/Y or n/N: ") | |
313 if(ans == "y" or ans == "Y"): | |
314 install_pkg(root_path, pkg_file, gn_inst) | |
315 update_gnlist(root_path, g_tbl, gn_inst, assembly, \ | |
316 species, ens_ver, np_ver) | |
317 return | |
318 else: | |
319 print "This is an extension package of " + feature + \ | |
320 ", ENS version " + str(ens_ver) + ", NPVer" + str(np_ver) + \ | |
321 ", please install the same version basic genome annotation first!" | |
322 return | |
323 | |
324 # For basic package. | |
325 # Install a newer version. | |
326 if ens_ver > installed_ens or \ | |
327 ens_ver == installed_ens and np_ver > installed_np: | |
328 print "Will upgrade the genome {0}:".format(gn_inst) | |
329 print "Ensembl: {0}==>{1}; ngs.plot: {2}==>{3}.".\ | |
330 format(installed_ens, ens_ver, installed_np, np_ver), | |
331 if yestoall: | |
332 install_pkg(root_path, pkg_file, gn_inst, gn_inst) | |
333 update_gnlist(root_path, g_tbl, gn_inst, assembly, \ | |
334 species, ens_ver, np_ver) | |
335 else: | |
336 ans = raw_input("Continue?(y/n): ") | |
337 while True: | |
338 if(ans == "y" or ans == "Y" or ans == "n" or ans == "N"): | |
339 break | |
340 else: | |
341 ans = raw_input("Answer must be y/Y or n/N: ") | |
342 if(ans == "y" or ans == "Y"): | |
343 install_pkg(root_path, pkg_file, gn_inst, gn_inst) | |
344 update_gnlist(root_path, g_tbl, gn_inst, assembly, \ | |
345 species, ens_ver, np_ver) | |
346 # Install an older version. | |
347 else: | |
348 print "Will install the same or older version", | |
349 print "of the genome {0}:".format(gn_inst) | |
350 print "Ensembl: {0}==>{1}; ngs.plot: {2}==>{3}.".\ | |
351 format(installed_ens, ens_ver, installed_np, np_ver), | |
352 if yestoall: | |
353 install_pkg(root_path, pkg_file, gn_inst, gn_inst) | |
354 update_gnlist(root_path, g_tbl, gn_inst, assembly, \ | |
355 species, ens_ver, np_ver) | |
356 else: | |
357 ans = raw_input("Continue?(y/n): ") | |
358 while True: | |
359 if(ans == "y" or ans == "Y" or ans == "n" or ans == "N"): | |
360 break | |
361 else: | |
362 ans = raw_input("Answer must be y/Y or n/N: ") | |
363 if(ans == "y" or ans == "Y"): | |
364 install_pkg(root_path, pkg_file, gn_inst, gn_inst) | |
365 update_gnlist(root_path, g_tbl, gn_inst, assembly, \ | |
366 species, ens_ver, np_ver) | |
367 # Totally new installation, only basic package could be installed. | |
368 else: | |
369 print "Will install new genome {0}:".format(gn_inst), | |
370 print "Ensembl=> v{0}; ngs.plot=> v{1}.".format(ens_ver, np_ver), | |
371 if yestoall: | |
372 install_pkg(root_path, pkg_file, gn_inst) | |
373 update_gnlist(root_path, g_tbl, gn_inst, assembly, \ | |
374 species, ens_ver, np_ver) | |
375 else: | |
376 ans = raw_input("Continue?(y/n): ") | |
377 | |
378 while True: | |
379 if(ans == "y" or ans == "Y" or ans == "n" or ans == "N"): | |
380 break | |
381 else: | |
382 ans = raw_input("Answer must be y/Y or n/N:") | |
383 if(ans == "y" or ans == "Y"): | |
384 install_pkg(root_path, pkg_file, gn_inst) | |
385 update_gnlist(root_path, g_tbl, gn_inst, assembly, \ | |
386 species, ens_ver, np_ver) | |
387 | |
388 | |
389 def install_pkg(root_path, pkg_file, new_gn, old_gn=None): | |
390 """Install genome from package file. | |
391 | |
392 Args: | |
393 root_path: ngs.plot installation directory. | |
394 pkg_file: package file name. | |
395 old_gn: old genome name to be removed first. | |
396 Returns: | |
397 None. | |
398 """ | |
399 | |
400 import shutil | |
401 import tarfile | |
402 import sys | |
403 | |
404 if old_gn: | |
405 print "Removing installed genome:{0}...".format(old_gn), | |
406 sys.stdout.flush() | |
407 shutil.rmtree(root_path + '/database/' + old_gn) | |
408 rm_dbtbl(root_path, old_gn) | |
409 print "Done" | |
410 | |
411 print "Installing new genome...", | |
412 sys.stdout.flush() | |
413 try: | |
414 tar_f = tarfile.open(pkg_file, "r:gz") | |
415 except tarfile.ReadError: | |
416 print "Read package file {0} error: ".format(pkg_file), | |
417 print "downloaded file may be corrupted." | |
418 sys.exit() | |
419 | |
420 try: | |
421 tar_f.extractall(root_path + "/database") | |
422 except tarfile.ExtractError: | |
423 print "Extract files from package error.", | |
424 print "The downloaded file may be corrupted." | |
425 | |
426 add_dbtbl(root_path, new_gn) | |
427 | |
428 print "Done" | |
429 | |
430 | |
431 def add_dbtbl(root_path, gn): | |
432 """Add a new genome into database meta tables. | |
433 | |
434 Args: | |
435 root_path: ngs.plot installation directory. | |
436 gn: genome name to add. | |
437 Returns: None. | |
438 """ | |
439 | |
440 import os.path | |
441 import os | |
442 import glob | |
443 import tempfile | |
444 import subprocess | |
445 | |
446 (dblist_f, dblist_n) = tempfile.mkstemp(text=True) | |
447 (gnlist_f, gnlist_n) = tempfile.mkstemp(text=True) | |
448 | |
449 # Obtain a list of .RData file names to extract info from. | |
450 all_rdata = root_path + '/database/{0}/{1}*.RData'.format(gn, gn) | |
451 all_set = set(map(os.path.basename, glob.glob(all_rdata))) | |
452 all_rdata = root_path + '/database/{0}/*/{1}*.RData'.format(gn, gn) | |
453 all_set = all_set | set(map(os.path.basename, glob.glob(all_rdata))) | |
454 exm_rdata = root_path + '/database/{0}/{1}*exonmodel*.RData'.format(gn, gn) | |
455 exm_set = set(map(os.path.basename, glob.glob(exm_rdata))) | |
456 nex_list = sorted(list(all_set - exm_set)) | |
457 | |
458 # DB file list. | |
459 gn_ens_list = {} # Ensembl genome name unique list. | |
460 desired_ncols = 6 | |
461 for fname in nex_list: | |
462 tokens = fname.split('.') | |
463 tokens = tokens[:6] | |
464 if tokens[-1] == 'RData': | |
465 tokens[-1] = 'NA' | |
466 tokens.extend(['NA'] * (desired_ncols - len(tokens))) | |
467 os.write(dblist_f, fname + "\t" + "\t".join(tokens) + "\n") | |
468 if tokens[1] == 'ensembl': | |
469 gn_ens_list[".".join(tokens[:3])] = 1 | |
470 os.close(dblist_f) | |
471 | |
472 # Ensembl genome name list. | |
473 for gn in sorted(gn_ens_list.viewkeys()): | |
474 os.write(gnlist_f, gn.replace(".", "\t") + "\n") | |
475 os.close(gnlist_f) | |
476 | |
477 # Call external program to create table default file. | |
478 (default_f, default_n) = tempfile.mkstemp(text=True) | |
479 subprocess.call(["setTableDefaults.py", gnlist_n, default_n]) | |
480 os.remove(gnlist_n) | |
481 | |
482 # Call external program to calcualte DB score and install the new entries | |
483 # into ngs.plot database. | |
484 subprocess.call(["install.db.tables.r", default_n, dblist_n]) | |
485 os.remove(default_n) | |
486 os.remove(dblist_n) | |
487 | |
488 | |
489 def remove(root_path, args): | |
490 """Remove genome from ngs.plot database. | |
491 | |
492 Args: | |
493 root_path: ngs.plot installation directory. | |
494 args.yes: say yes to all questions. | |
495 args.gn: genome name. | |
496 Returns: | |
497 None. | |
498 """ | |
499 | |
500 import shutil | |
501 import sys | |
502 | |
503 yestoall = args.yes | |
504 sub_ftr = args.ftr | |
505 | |
506 (h_sp, g_tbl, v_cw) = read_gnlist(root_path, "hash") | |
507 | |
508 gn = args.gn | |
509 | |
510 if gn in g_tbl and sub_ftr is None: | |
511 print "Will remove genome {0} from database.".format(gn), | |
512 do_rm = False | |
513 if yestoall: | |
514 do_rm = True | |
515 else: | |
516 ans = raw_input("Continue?(y/n): ") | |
517 while True: | |
518 if ans == 'y' or ans == 'Y' or ans == 'n' or ans == 'N': | |
519 break | |
520 else: | |
521 ans = raw_input("The answer must be y/Y or n/N: ") | |
522 if ans == 'y' or ans == 'Y': | |
523 do_rm = True | |
524 if do_rm: | |
525 folder_to_rm = root_path + "/database/" + gn | |
526 print "Removing genome...", | |
527 sys.stdout.flush() | |
528 shutil.rmtree(folder_to_rm) | |
529 del g_tbl[gn] | |
530 write_gnlist(root_path, g_tbl) | |
531 rm_dbtbl(root_path, gn) | |
532 print "Done" | |
533 elif gn in g_tbl and sub_ftr is not None: | |
534 print "Will remove genomic feature {0} of genome {1} from database."\ | |
535 .format(sub_ftr, gn) | |
536 do_rm = False | |
537 if yestoall: | |
538 do_rm = True | |
539 else: | |
540 ans = raw_input("Continue?(y/n): ") | |
541 while True: | |
542 if ans == 'y' or ans == 'Y' or ans == 'n' or ans == 'N': | |
543 break | |
544 else: | |
545 ans = raw_input("The answer must be y/Y or n/N: ") | |
546 if ans == 'y' or ans == 'Y': | |
547 do_rm = True | |
548 if do_rm: | |
549 folder_to_rm = root_path + "/database/" + gn + "/" + sub_ftr | |
550 print "Removing genome...", | |
551 sys.stdout.flush() | |
552 shutil.rmtree(folder_to_rm) | |
553 # g_tbl does't need to be updated | |
554 rm_dbtbl(root_path, gn, sub_ftr) | |
555 print "Done" | |
556 else: | |
557 print "Cannot find the genome in database. Nothing was done." | |
558 | |
559 | |
560 def rm_dbtbl(root_path, gn, sub_ftr=None): | |
561 """Remove a genome from database meta tables. | |
562 | |
563 Args: | |
564 root_path: ngs.plot installation directory. | |
565 gn: genome name to remove. | |
566 Returns: None. | |
567 """ | |
568 | |
569 import subprocess | |
570 | |
571 if sub_ftr is None: | |
572 subprocess.call(["remove.db.tables.r", gn]) | |
573 else: | |
574 subprocess.call(["remove.db.tables.r", gn, sub_ftr]) | |
575 | |
576 def chrnames(root_path, args): | |
577 """List chromosome names for a given genome. | |
578 | |
579 Args: | |
580 root_path: ngs.plot installation directory. | |
581 args.gn: genome name to remove. | |
582 args.db: database(ensembl or refseq). | |
583 Returns: None. | |
584 """ | |
585 | |
586 import sys | |
587 | |
588 db = args.db | |
589 gn = args.gn | |
590 | |
591 if db != "ensembl" and db != "refseq": | |
592 print "Unrecognized database name: database must be ensembl or refseq." | |
593 sys.exit() | |
594 | |
595 chrnames_file = root_path + "/database/" + gn + "/.chrnames." + db | |
596 | |
597 try: | |
598 chrf = open(chrnames_file) | |
599 except IOError: | |
600 print "Open file: {0} error.".format(chrnames_file), | |
601 print "Your database may be corrupted or you have an older version." | |
602 sys.exit() | |
603 | |
604 chr_list = chrf.read(1000000) # set 1MB limit to avoid nasty input. | |
605 chrf.close() | |
606 | |
607 chr_list = chr_list.strip() | |
608 print chr_list | |
609 | |
610 | |
611 | |
612 ############################################### | |
613 ######## Main procedure. ###################### | |
614 if __name__ == "__main__": | |
615 | |
616 import argparse | |
617 import os | |
618 import sys | |
619 | |
620 try: | |
621 root_path = os.environ["NGSPLOT"] | |
622 except KeyError: | |
623 print "Environmental variable NGSPLOT is not set! Exit.\n" | |
624 sys.exit() | |
625 | |
626 # Main parser. | |
627 parser = argparse.ArgumentParser(description="Manipulate ngs.plot's \ | |
628 annotation database", | |
629 prog="ngsplotdb.py") | |
630 parser.add_argument("-y", "--yes", help="Say yes to all prompted questions", | |
631 action="store_true") | |
632 | |
633 subparsers = parser.add_subparsers(title="Subcommands", | |
634 help="additional help") | |
635 | |
636 # ngsplotdb.py list parser. | |
637 parser_list = subparsers.add_parser("list", help="List genomes installed \ | |
638 in database") | |
639 parser_list.set_defaults(func=listgn) | |
640 | |
641 # ngsplotdb.py install parser. | |
642 parser_install = subparsers.add_parser("install", | |
643 help="Install genome from package \ | |
644 file") | |
645 parser_install.add_argument("pkg", help="Package file(.tar.gz) to install", | |
646 type=str) | |
647 parser_install.set_defaults(func=install) | |
648 | |
649 # ngsplotdb.py remove parser. | |
650 parser_remove = subparsers.add_parser("remove", | |
651 help="Remove genome from database") | |
652 parser_remove.add_argument("gn", help="Name of genome to be \ | |
653 removed(e.g. hg18)", type=str) | |
654 parser_remove.add_argument("--ftr", help="Remove cellline specific features \ | |
655 (enhancer, dhs, etc)", type=str,\ | |
656 default=None) | |
657 parser_remove.set_defaults(func=remove) | |
658 | |
659 # ngsplotdb.py chromosome names parser. | |
660 parser_chrnames = subparsers.add_parser("chrnames", | |
661 help="List chromosome names") | |
662 parser_chrnames.add_argument("gn", help="Genome name(e.g. mm9)", type=str) | |
663 parser_chrnames.add_argument("db", help="Database(ensembl or refseq)", | |
664 type=str) | |
665 parser_chrnames.set_defaults(func=chrnames) | |
666 | |
667 | |
668 args = parser.parse_args() | |
669 args.func(root_path, args) | |
670 |