0
|
1 import os
|
|
2 import re
|
|
3 import shutil
|
|
4 import sys
|
|
5 import subprocess
|
|
6 import zipfile
|
1
|
7
|
0
|
8
|
|
9 def main():
|
|
10
|
1
|
11 extra_files_directory = sys.argv[1]
|
|
12 report = sys.argv[4]
|
|
13 new_files_directory = sys.argv[6]
|
|
14 dataset=sys.argv[7]
|
|
15 cdffull_name=sys.argv[9]
|
|
16 ufl_name=sys.argv[10]
|
|
17 ugp_name=sys.argv[11]
|
|
18 acs_name=sys.argv[12]
|
|
19 cdffull=sys.argv[14]
|
|
20 ufl=sys.argv[15]
|
|
21 ugp=sys.argv[16]
|
|
22 acs=sys.argv[17]
|
|
23 tumor=sys.argv[18]
|
|
24 settingType=sys.argv[19]
|
|
25 outputgraph=sys.argv[20]
|
|
26 zipfigures=sys.argv[21]
|
|
27 outputlog=sys.argv[22]
|
|
28 log=sys.argv[23]
|
|
29 user=sys.argv[24]
|
|
30
|
|
31 extra_file_names = sorted(os.listdir(extra_files_directory))
|
0
|
32
|
1
|
33 if (cdffull_name.count(",") != 0):
|
|
34 chipType=cdffull_name.split(",",1)[0]
|
|
35 tagExt=cdffull_name.split(",",1)[1]
|
|
36 tag=tagExt.split(".",1)[0]
|
|
37 else:
|
|
38 chipType=cdffull_name.split(".",1)[0]
|
|
39 tag=""
|
|
40
|
|
41 data_dir = os.path.join(new_files_directory, user, dataset)
|
|
42 mpagenomics_dir = os.path.join(new_files_directory, "mpagenomics",user)
|
|
43
|
0
|
44 try:
|
|
45 os.makedirs(data_dir)
|
|
46 except:
|
|
47 shutil.rmtree(data_dir)
|
|
48 os.makedirs(data_dir)
|
|
49
|
|
50 if (not os.path.isdir(mpagenomics_dir)):
|
|
51 os.makedirs(mpagenomics_dir)
|
1
|
52
|
|
53 for name in extra_file_names:
|
|
54 source = os.path.join(extra_files_directory, name)
|
|
55 # Strip _task_XXX from end of name
|
|
56 name_match = re.match(r"^\d+_task_(.*).dat$", name)
|
|
57 if name_match:
|
|
58 name = name_match.group(1)
|
|
59 else:
|
|
60 # Skip indices, composite extra_files_paths, etc...
|
|
61 continue
|
|
62 #escaped_name = name.replace("_", "-")
|
|
63 #dataset_name = "%s" % (name, 'visible', ext, db_key)
|
|
64 destination = os.path.join(data_dir, name)
|
|
65 _copy(source, destination)
|
|
66 # datasets_created.append(name)
|
0
|
67
|
|
68 _copy(cdffull,os.path.join(data_dir, cdffull_name))
|
|
69 _copy(ugp,os.path.join(data_dir, ugp_name))
|
|
70 _copy(ufl,os.path.join(data_dir, ufl_name))
|
|
71 _copy(acs,os.path.join(data_dir, acs_name))
|
|
72
|
1
|
73
|
0
|
74 fig_dir = os.path.join("mpagenomics", user, "figures", dataset, "signal")
|
|
75 abs_fig_dir = os.path.join(new_files_directory, fig_dir)
|
1
|
76
|
|
77
|
|
78 retcode = _preprocess(chipType, dataset, mpagenomics_dir, data_dir, new_files_directory, tumor, settingType, outputgraph, outputlog, log, tag)
|
0
|
79
|
|
80 if (retcode == 0):
|
|
81 if (os.path.isdir(abs_fig_dir)) and (outputgraph == "TRUE"):
|
1
|
82
|
0
|
83 new_files = os.listdir(abs_fig_dir)
|
|
84 zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, zipfigures), 'w', zipfile.ZIP_DEFLATED)
|
|
85 for current_file in new_files:
|
|
86 fn = os.path.join(abs_fig_dir, current_file)
|
|
87 relfn = fn[len(abs_fig_dir) + len(os.sep):]
|
|
88 zipbuf.write(fn, relfn)
|
1
|
89
|
0
|
90 f = open(report, "w")
|
|
91 # Create report
|
|
92 try:
|
|
93 for name in extra_file_names:
|
|
94 f.write("%s\t%s\t%s\n" %(re.match(r"^\d+_task_(.*).dat$", name).group(1),dataset,chipType))
|
|
95 finally:
|
|
96 shutil.rmtree(data_dir)
|
|
97 f.close()
|
1
|
98
|
0
|
99 sys.exit(retcode)
|
1
|
100
|
0
|
101 sys.exit(retcode)
|
1
|
102
|
0
|
103
|
|
104 def _copy(source, destination):
|
|
105 try:
|
|
106 os.link(source, destination)
|
|
107 except:
|
|
108 shutil.copy(source, destination)
|
|
109
|
|
110 def _preprocess (chipType,dataset,mpagenomics_dir,data_dir,tmp_dir,tumor,settingType,outputgraph,outputlog,log,tag):
|
|
111 script_dir=os.path.dirname(os.path.abspath(__file__))
|
|
112
|
|
113 if (outputlog=="TRUE"):
|
|
114 errfile=open(log,'w')
|
|
115 else:
|
|
116 errfile=open(os.path.join(tmp_dir,"errfile.log"),'w')
|
|
117
|
|
118 retcode = subprocess.call(["Rscript", os.path.join(script_dir,"preprocess.R"), chipType, dataset, mpagenomics_dir, data_dir, tumor, settingType, outputgraph, tag], stdout = errfile, stderr = errfile)
|
|
119 return(retcode)
|
|
120
|
|
121
|
|
122 if __name__ == "__main__":
|
|
123 main()
|