Mercurial > repos > sblanck > mpagenomics_normalize
comparison preprocess.py @ 1:4d25dec9707e
correction
author | blanck |
---|---|
date | Tue, 28 Apr 2015 11:23:47 +0200 |
parents | a89bae08bf2d |
children | 54d549210759 |
comparison
equal
deleted
inserted
replaced
0:a89bae08bf2d | 1:4d25dec9707e |
---|---|
2 import re | 2 import re |
3 import shutil | 3 import shutil |
4 import sys | 4 import sys |
5 import subprocess | 5 import subprocess |
6 import zipfile | 6 import zipfile |
7 import optparse | 7 |
8 | 8 |
9 def main(): | 9 def main(): |
10 | 10 |
11 parser = optparse.OptionParser() | 11 extra_files_directory = sys.argv[1] |
12 parser.add_option('-s', action="store", dest='summary') | 12 report = sys.argv[4] |
13 parser.add_option('-p', action="store", dest='new_file_path') | 13 new_files_directory = sys.argv[6] |
14 parser.add_option('-c', action="store", dest='inputcdffull_name') | 14 dataset=sys.argv[7] |
15 parser.add_option('-f', action="store", dest='inputufl_name') | 15 cdffull_name=sys.argv[9] |
16 parser.add_option('-g', action="store", dest='inputugp_name') | 16 ufl_name=sys.argv[10] |
17 parser.add_option('-a', action="store", dest='inputacs_name') | 17 ugp_name=sys.argv[11] |
18 parser.add_option('-d', action="store", dest='inputcdffull') | 18 acs_name=sys.argv[12] |
19 parser.add_option('-v', action="store", dest='inputufl') | 19 cdffull=sys.argv[14] |
20 parser.add_option('-h', action="store", dest='inputugp') | 20 ufl=sys.argv[15] |
21 parser.add_option('-b', action="store", dest='inputacs') | 21 ugp=sys.argv[16] |
22 parser.add_option('-t', action="store", dest='tumorcsv') | 22 acs=sys.argv[17] |
23 parser.add_option('-y', action="store", dest='settingsType') | 23 tumor=sys.argv[18] |
24 parser.add_option('-o', action="store", dest='outputgraph') | 24 settingType=sys.argv[19] |
25 parser.add_option('-z', action="store", dest='zipfigures') | 25 outputgraph=sys.argv[20] |
26 parser.add_option('-k', action="store", dest='outputlog') | 26 zipfigures=sys.argv[21] |
27 parser.add_option('-l', action="store", dest='log') | 27 outputlog=sys.argv[22] |
28 parser.add_option('-u', action="store", dest='user_id') | 28 log=sys.argv[23] |
29 | 29 user=sys.argv[24] |
30 parser.add_option('-i', action="append", dest='inputFile', default=[]) | 30 |
31 parser.add_option('-n', action='append', dest='inputFileName', default=[]) | 31 extra_file_names = sorted(os.listdir(extra_files_directory)) |
32 | 32 |
33 options, args = parser.parse_args() | 33 if (cdffull_name.count(",") != 0): |
34 outputFileName=options.outputFile | 34 chipType=cdffull_name.split(",",1)[0] |
35 | 35 tagExt=cdffull_name.split(",",1)[1] |
36 print options.inputFile | 36 tag=tagExt.split(".",1)[0] |
37 print options.inputFileName | 37 else: |
38 | 38 chipType=cdffull_name.split(".",1)[0] |
39 dataSetName="dataset" | 39 tag="" |
40 destinationPath=os.path.join(options.new_file_path, user, dataset) | 40 |
41 | 41 data_dir = os.path.join(new_files_directory, user, dataset) |
42 mpagenomics_dir = os.path.join(destinationPath,"mpagenomics",user) | 42 mpagenomics_dir = os.path.join(new_files_directory, "mpagenomics",user) |
43 data_dir = os.path.join(options.new_file_path, user) | 43 |
44 | |
45 try: | 44 try: |
46 os.makedirs(data_dir) | 45 os.makedirs(data_dir) |
47 except: | 46 except: |
48 shutil.rmtree(data_dir) | 47 shutil.rmtree(data_dir) |
49 os.makedirs(data_dir) | 48 os.makedirs(data_dir) |
50 | 49 |
51 if (not os.path.isdir(mpagenomics_dir)): | 50 if (not os.path.isdir(mpagenomics_dir)): |
52 os.makedirs(mpagenomics_dir) | 51 os.makedirs(mpagenomics_dir) |
52 | |
53 for name in extra_file_names: | |
54 source = os.path.join(extra_files_directory, name) | |
55 # Strip _task_XXX from end of name | |
56 name_match = re.match(r"^\d+_task_(.*).dat$", name) | |
57 if name_match: | |
58 name = name_match.group(1) | |
59 else: | |
60 # Skip indices, composite extra_files_paths, etc... | |
61 continue | |
62 #escaped_name = name.replace("_", "-") | |
63 #dataset_name = "%s" % (name, 'visible', ext, db_key) | |
64 destination = os.path.join(data_dir, name) | |
65 _copy(source, destination) | |
66 # datasets_created.append(name) | |
53 | 67 |
54 for inputFile, inputFileName in zip(options.inputFile,options.inputFileName): | |
55 source = inputFile | |
56 destination=os.path.join(data_dir,inputFileName) | |
57 os.symlink(source,destination) | |
58 | |
59 if (cdffull_name.count(",") != 0): | |
60 chipType=cdffull_name.split(",",1)[0] | |
61 tagExt=cdffull_name.split(",",1)[1] | |
62 tag=tagExt.split(".",1)[0] | |
63 else: | |
64 chipType=cdffull_name.split(".",1)[0] | |
65 tag="" | |
66 | |
67 _copy(cdffull,os.path.join(data_dir, cdffull_name)) | 68 _copy(cdffull,os.path.join(data_dir, cdffull_name)) |
68 _copy(ugp,os.path.join(data_dir, ugp_name)) | 69 _copy(ugp,os.path.join(data_dir, ugp_name)) |
69 _copy(ufl,os.path.join(data_dir, ufl_name)) | 70 _copy(ufl,os.path.join(data_dir, ufl_name)) |
70 _copy(acs,os.path.join(data_dir, acs_name)) | 71 _copy(acs,os.path.join(data_dir, acs_name)) |
71 | |
72 | 72 |
73 | |
73 fig_dir = os.path.join("mpagenomics", user, "figures", dataset, "signal") | 74 fig_dir = os.path.join("mpagenomics", user, "figures", dataset, "signal") |
74 abs_fig_dir = os.path.join(new_files_directory, fig_dir) | 75 abs_fig_dir = os.path.join(new_files_directory, fig_dir) |
76 | |
77 | |
78 retcode = _preprocess(chipType, dataset, mpagenomics_dir, data_dir, new_files_directory, tumor, settingType, outputgraph, outputlog, log, tag) | |
75 | 79 |
76 | |
77 retcode = _preprocess(chipType, dataSetName, mpagenomics_dir, data_dir, options.new_file_path, options.tumorcsv, options.settingType, options.outputgraph, options.outputlog, options.log, tag) | |
78 | |
79 if (retcode == 0): | 80 if (retcode == 0): |
80 if (os.path.isdir(abs_fig_dir)) and (outputgraph == "TRUE"): | 81 if (os.path.isdir(abs_fig_dir)) and (outputgraph == "TRUE"): |
81 | 82 |
82 new_files = os.listdir(abs_fig_dir) | 83 new_files = os.listdir(abs_fig_dir) |
83 zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, zipfigures), 'w', zipfile.ZIP_DEFLATED) | 84 zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, zipfigures), 'w', zipfile.ZIP_DEFLATED) |
84 for current_file in new_files: | 85 for current_file in new_files: |
85 fn = os.path.join(abs_fig_dir, current_file) | 86 fn = os.path.join(abs_fig_dir, current_file) |
86 relfn = fn[len(abs_fig_dir) + len(os.sep):] | 87 relfn = fn[len(abs_fig_dir) + len(os.sep):] |
87 zipbuf.write(fn, relfn) | 88 zipbuf.write(fn, relfn) |
88 | 89 |
89 f = open(report, "w") | 90 f = open(report, "w") |
90 # Create report | 91 # Create report |
91 try: | 92 try: |
92 for name in extra_file_names: | 93 for name in extra_file_names: |
93 f.write("%s\t%s\t%s\n" %(re.match(r"^\d+_task_(.*).dat$", name).group(1),dataset,chipType)) | 94 f.write("%s\t%s\t%s\n" %(re.match(r"^\d+_task_(.*).dat$", name).group(1),dataset,chipType)) |
94 finally: | 95 finally: |
95 shutil.rmtree(data_dir) | 96 shutil.rmtree(data_dir) |
96 f.close() | 97 f.close() |
97 | 98 |
98 sys.exit(retcode) | 99 sys.exit(retcode) |
99 | 100 |
100 sys.exit(retcode) | 101 sys.exit(retcode) |
101 | 102 |
102 | 103 |
103 def _copy(source, destination): | 104 def _copy(source, destination): |
104 try: | 105 try: |
105 os.link(source, destination) | 106 os.link(source, destination) |
106 except: | 107 except: |