Mercurial > repos > sblanck > mpagenomics_normalize
comparison preprocess.py @ 2:54d549210759
correction
author | blanck |
---|---|
date | Tue, 28 Apr 2015 11:26:30 +0200 |
parents | 4d25dec9707e |
children |
comparison
equal
deleted
inserted
replaced
1:4d25dec9707e | 2:54d549210759 |
---|---|
2 import re | 2 import re |
3 import shutil | 3 import shutil |
4 import sys | 4 import sys |
5 import subprocess | 5 import subprocess |
6 import zipfile | 6 import zipfile |
7 | 7 import optparse |
8 | 8 |
9 def main(): | 9 def main(): |
10 | 10 |
11 extra_files_directory = sys.argv[1] | 11 parser = optparse.OptionParser() |
12 report = sys.argv[4] | 12 parser.add_option('-s', action="store", dest='summary') |
13 new_files_directory = sys.argv[6] | 13 parser.add_option('-e', action="store", dest='dataSetName') |
14 dataset=sys.argv[7] | 14 parser.add_option('-p', action="store", dest='new_file_path') |
15 cdffull_name=sys.argv[9] | 15 parser.add_option('-c', action="store", dest='inputcdffull_name') |
16 ufl_name=sys.argv[10] | 16 parser.add_option('-f', action="store", dest='inputufl_name') |
17 ugp_name=sys.argv[11] | 17 parser.add_option('-g', action="store", dest='inputugp_name') |
18 acs_name=sys.argv[12] | 18 parser.add_option('-a', action="store", dest='inputacs_name') |
19 cdffull=sys.argv[14] | 19 parser.add_option('-d', action="store", dest='inputcdffull') |
20 ufl=sys.argv[15] | 20 parser.add_option('-v', action="store", dest='inputufl') |
21 ugp=sys.argv[16] | 21 parser.add_option('-w', action="store", dest='inputugp') |
22 acs=sys.argv[17] | 22 parser.add_option('-b', action="store", dest='inputacs') |
23 tumor=sys.argv[18] | 23 parser.add_option('-t', action="store", dest='tumorcsv') |
24 settingType=sys.argv[19] | 24 parser.add_option('-y', action="store", dest='settingsType') |
25 outputgraph=sys.argv[20] | 25 parser.add_option('-o', action="store", dest='outputgraph') |
26 zipfigures=sys.argv[21] | 26 parser.add_option('-z', action="store", dest='zipfigures') |
27 outputlog=sys.argv[22] | 27 parser.add_option('-k', action="store", dest='outputlog') |
28 log=sys.argv[23] | 28 parser.add_option('-l', action="store", dest='log') |
29 user=sys.argv[24] | 29 parser.add_option('-u', action="store", dest='user_id') |
30 | |
31 parser.add_option('-i', action="append", dest='inputFile', default=[]) | |
32 parser.add_option('-n', action='append', dest='inputFileName', default=[]) | |
33 | |
34 options, args = parser.parse_args() | |
35 | |
36 dataSetName=options.dataSetName | |
37 destinationPath=os.path.join(options.new_file_path, options.user_id, dataSetName) | |
30 | 38 |
31 extra_file_names = sorted(os.listdir(extra_files_directory)) | 39 mpagenomics_dir = os.path.join(options.new_file_path,"mpagenomics",options.user_id) |
32 | 40 data_dir = os.path.join(options.new_file_path, options.user_id) |
33 if (cdffull_name.count(",") != 0): | 41 |
34 chipType=cdffull_name.split(",",1)[0] | |
35 tagExt=cdffull_name.split(",",1)[1] | |
36 tag=tagExt.split(".",1)[0] | |
37 else: | |
38 chipType=cdffull_name.split(".",1)[0] | |
39 tag="" | |
40 | |
41 data_dir = os.path.join(new_files_directory, user, dataset) | |
42 mpagenomics_dir = os.path.join(new_files_directory, "mpagenomics",user) | |
43 | |
44 try: | 42 try: |
45 os.makedirs(data_dir) | 43 os.makedirs(data_dir) |
46 except: | 44 except: |
47 shutil.rmtree(data_dir) | 45 shutil.rmtree(data_dir) |
48 os.makedirs(data_dir) | 46 os.makedirs(data_dir) |
49 | 47 |
50 if (not os.path.isdir(mpagenomics_dir)): | 48 if (not os.path.isdir(mpagenomics_dir)): |
51 os.makedirs(mpagenomics_dir) | 49 os.makedirs(mpagenomics_dir) |
52 | |
53 for name in extra_file_names: | |
54 source = os.path.join(extra_files_directory, name) | |
55 # Strip _task_XXX from end of name | |
56 name_match = re.match(r"^\d+_task_(.*).dat$", name) | |
57 if name_match: | |
58 name = name_match.group(1) | |
59 else: | |
60 # Skip indices, composite extra_files_paths, etc... | |
61 continue | |
62 #escaped_name = name.replace("_", "-") | |
63 #dataset_name = "%s" % (name, 'visible', ext, db_key) | |
64 destination = os.path.join(data_dir, name) | |
65 _copy(source, destination) | |
66 # datasets_created.append(name) | |
67 | 50 |
68 _copy(cdffull,os.path.join(data_dir, cdffull_name)) | 51 for inputFile, inputFileName in zip(options.inputFile,options.inputFileName): |
69 _copy(ugp,os.path.join(data_dir, ugp_name)) | 52 source = inputFile |
70 _copy(ufl,os.path.join(data_dir, ufl_name)) | 53 destination=os.path.join(data_dir,inputFileName) |
71 _copy(acs,os.path.join(data_dir, acs_name)) | 54 _copy(source,destination) |
72 | 55 |
73 | |
74 fig_dir = os.path.join("mpagenomics", user, "figures", dataset, "signal") | |
75 abs_fig_dir = os.path.join(new_files_directory, fig_dir) | |
76 | |
77 | 56 |
78 retcode = _preprocess(chipType, dataset, mpagenomics_dir, data_dir, new_files_directory, tumor, settingType, outputgraph, outputlog, log, tag) | 57 cdffull_name=options.inputcdffull_name |
58 if (cdffull_name.count(",") != 0): | |
59 chipType=cdffull_name.split(",",1)[0] | |
60 tagExt=cdffull_name.split(",",1)[1] | |
61 tag=tagExt.split(".",1)[0] | |
62 else: | |
63 chipType=cdffull_name.split(".",1)[0] | |
64 tag="" | |
65 | |
66 _copy(options.inputcdffull,os.path.join(data_dir, options.inputcdffull_name)) | |
67 _copy(options.inputugp,os.path.join(data_dir, options.inputugp_name)) | |
68 _copy(options.inputufl,os.path.join(data_dir, options.inputufl_name)) | |
69 _copy(options.inputacs,os.path.join(data_dir, options.inputacs_name)) | |
70 | |
71 | |
72 fig_dir = os.path.join("mpagenomics", options.user_id, "figures", dataSetName, "signal") | |
73 abs_fig_dir = os.path.join(options.new_file_path, fig_dir) | |
79 | 74 |
75 | |
76 retcode = _preprocess(chipType, dataSetName, mpagenomics_dir, data_dir, options.new_file_path, options.tumorcsv, options.settingsType, options.outputgraph, options.outputlog, options.log, tag) | |
77 | |
80 if (retcode == 0): | 78 if (retcode == 0): |
81 if (os.path.isdir(abs_fig_dir)) and (outputgraph == "TRUE"): | 79 if (os.path.isdir(abs_fig_dir)) and (options.outputgraph == "TRUE"): |
82 | 80 |
83 new_files = os.listdir(abs_fig_dir) | 81 new_files = os.listdir(abs_fig_dir) |
84 zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, zipfigures), 'w', zipfile.ZIP_DEFLATED) | 82 zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, options.zipfigures), 'w', zipfile.ZIP_DEFLATED) |
85 for current_file in new_files: | 83 for current_file in new_files: |
86 fn = os.path.join(abs_fig_dir, current_file) | 84 fn = os.path.join(abs_fig_dir, current_file) |
87 relfn = fn[len(abs_fig_dir) + len(os.sep):] | 85 relfn = fn[len(abs_fig_dir) + len(os.sep):] |
88 zipbuf.write(fn, relfn) | 86 zipbuf.write(fn, relfn) |
89 | 87 |
90 f = open(report, "w") | 88 f = open(options.summary, "w") |
91 # Create report | 89 # Create report |
92 try: | 90 try: |
93 for name in extra_file_names: | 91 for inputFileName in options.inputFileName: |
94 f.write("%s\t%s\t%s\n" %(re.match(r"^\d+_task_(.*).dat$", name).group(1),dataset,chipType)) | 92 f.write("%s\t%s\t%s\n" %(inputFileName,dataSetName,chipType)) |
95 finally: | 93 finally: |
96 shutil.rmtree(data_dir) | 94 shutil.rmtree(data_dir) |
97 f.close() | 95 f.close() |
98 | 96 |
99 sys.exit(retcode) | 97 sys.exit(retcode) |
100 | 98 |
101 sys.exit(retcode) | 99 sys.exit(retcode) |
102 | 100 |
103 | 101 |
104 def _copy(source, destination): | 102 def _copy(source, destination): |
105 try: | 103 try: |
106 os.link(source, destination) | 104 os.symlink(source, destination) |
107 except: | 105 except: |
108 shutil.copy(source, destination) | 106 shutil.copy(source, destination) |
109 | 107 |
110 def _preprocess (chipType,dataset,mpagenomics_dir,data_dir,tmp_dir,tumor,settingType,outputgraph,outputlog,log,tag): | 108 def _preprocess (chipType,dataset,mpagenomics_dir,data_dir,tmp_dir,tumor,settingType,outputgraph,outputlog,log,tag): |
111 script_dir=os.path.dirname(os.path.abspath(__file__)) | 109 script_dir=os.path.dirname(os.path.abspath(__file__)) |