Mercurial > repos > sblanck > mpagenomics_normalize
comparison preprocess.py @ 0:a89bae08bf2d
Uploaded
author | sblanck |
---|---|
date | Mon, 27 Apr 2015 05:48:52 -0400 |
parents | |
children | 4d25dec9707e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a89bae08bf2d |
---|---|
1 import os | |
2 import re | |
3 import shutil | |
4 import sys | |
5 import subprocess | |
6 import zipfile | |
7 import optparse | |
8 | |
9 def main(): | |
10 | |
11 parser = optparse.OptionParser() | |
12 parser.add_option('-s', action="store", dest='summary') | |
13 parser.add_option('-p', action="store", dest='new_file_path') | |
14 parser.add_option('-c', action="store", dest='inputcdffull_name') | |
15 parser.add_option('-f', action="store", dest='inputufl_name') | |
16 parser.add_option('-g', action="store", dest='inputugp_name') | |
17 parser.add_option('-a', action="store", dest='inputacs_name') | |
18 parser.add_option('-d', action="store", dest='inputcdffull') | |
19 parser.add_option('-v', action="store", dest='inputufl') | |
20 parser.add_option('-h', action="store", dest='inputugp') | |
21 parser.add_option('-b', action="store", dest='inputacs') | |
22 parser.add_option('-t', action="store", dest='tumorcsv') | |
23 parser.add_option('-y', action="store", dest='settingsType') | |
24 parser.add_option('-o', action="store", dest='outputgraph') | |
25 parser.add_option('-z', action="store", dest='zipfigures') | |
26 parser.add_option('-k', action="store", dest='outputlog') | |
27 parser.add_option('-l', action="store", dest='log') | |
28 parser.add_option('-u', action="store", dest='user_id') | |
29 | |
30 parser.add_option('-i', action="append", dest='inputFile', default=[]) | |
31 parser.add_option('-n', action='append', dest='inputFileName', default=[]) | |
32 | |
33 options, args = parser.parse_args() | |
34 outputFileName=options.outputFile | |
35 | |
36 print options.inputFile | |
37 print options.inputFileName | |
38 | |
39 dataSetName="dataset" | |
40 destinationPath=os.path.join(options.new_file_path, user, dataset) | |
41 | |
42 mpagenomics_dir = os.path.join(destinationPath,"mpagenomics",user) | |
43 data_dir = os.path.join(options.new_file_path, user) | |
44 | |
45 try: | |
46 os.makedirs(data_dir) | |
47 except: | |
48 shutil.rmtree(data_dir) | |
49 os.makedirs(data_dir) | |
50 | |
51 if (not os.path.isdir(mpagenomics_dir)): | |
52 os.makedirs(mpagenomics_dir) | |
53 | |
54 for inputFile, inputFileName in zip(options.inputFile,options.inputFileName): | |
55 source = inputFile | |
56 destination=os.path.join(data_dir,inputFileName) | |
57 os.symlink(source,destination) | |
58 | |
59 if (cdffull_name.count(",") != 0): | |
60 chipType=cdffull_name.split(",",1)[0] | |
61 tagExt=cdffull_name.split(",",1)[1] | |
62 tag=tagExt.split(".",1)[0] | |
63 else: | |
64 chipType=cdffull_name.split(".",1)[0] | |
65 tag="" | |
66 | |
67 _copy(cdffull,os.path.join(data_dir, cdffull_name)) | |
68 _copy(ugp,os.path.join(data_dir, ugp_name)) | |
69 _copy(ufl,os.path.join(data_dir, ufl_name)) | |
70 _copy(acs,os.path.join(data_dir, acs_name)) | |
71 | |
72 | |
73 fig_dir = os.path.join("mpagenomics", user, "figures", dataset, "signal") | |
74 abs_fig_dir = os.path.join(new_files_directory, fig_dir) | |
75 | |
76 | |
77 retcode = _preprocess(chipType, dataSetName, mpagenomics_dir, data_dir, options.new_file_path, options.tumorcsv, options.settingType, options.outputgraph, options.outputlog, options.log, tag) | |
78 | |
79 if (retcode == 0): | |
80 if (os.path.isdir(abs_fig_dir)) and (outputgraph == "TRUE"): | |
81 | |
82 new_files = os.listdir(abs_fig_dir) | |
83 zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, zipfigures), 'w', zipfile.ZIP_DEFLATED) | |
84 for current_file in new_files: | |
85 fn = os.path.join(abs_fig_dir, current_file) | |
86 relfn = fn[len(abs_fig_dir) + len(os.sep):] | |
87 zipbuf.write(fn, relfn) | |
88 | |
89 f = open(report, "w") | |
90 # Create report | |
91 try: | |
92 for name in extra_file_names: | |
93 f.write("%s\t%s\t%s\n" %(re.match(r"^\d+_task_(.*).dat$", name).group(1),dataset,chipType)) | |
94 finally: | |
95 shutil.rmtree(data_dir) | |
96 f.close() | |
97 | |
98 sys.exit(retcode) | |
99 | |
100 sys.exit(retcode) | |
101 | |
102 | |
103 def _copy(source, destination): | |
104 try: | |
105 os.link(source, destination) | |
106 except: | |
107 shutil.copy(source, destination) | |
108 | |
109 def _preprocess (chipType,dataset,mpagenomics_dir,data_dir,tmp_dir,tumor,settingType,outputgraph,outputlog,log,tag): | |
110 script_dir=os.path.dirname(os.path.abspath(__file__)) | |
111 | |
112 if (outputlog=="TRUE"): | |
113 errfile=open(log,'w') | |
114 else: | |
115 errfile=open(os.path.join(tmp_dir,"errfile.log"),'w') | |
116 | |
117 retcode = subprocess.call(["Rscript", os.path.join(script_dir,"preprocess.R"), chipType, dataset, mpagenomics_dir, data_dir, tumor, settingType, outputgraph, tag], stdout = errfile, stderr = errfile) | |
118 return(retcode) | |
119 | |
120 | |
121 if __name__ == "__main__": | |
122 main() |