comparison preprocess.py @ 2:54d549210759

correction
author blanck
date Tue, 28 Apr 2015 11:26:30 +0200
parents 4d25dec9707e
children
comparison
equal deleted inserted replaced
1:4d25dec9707e 2:54d549210759
2 import re 2 import re
3 import shutil 3 import shutil
4 import sys 4 import sys
5 import subprocess 5 import subprocess
6 import zipfile 6 import zipfile
7 7 import optparse
8 8
9 def main(): 9 def main():
10 10
11 extra_files_directory = sys.argv[1] 11 parser = optparse.OptionParser()
12 report = sys.argv[4] 12 parser.add_option('-s', action="store", dest='summary')
13 new_files_directory = sys.argv[6] 13 parser.add_option('-e', action="store", dest='dataSetName')
14 dataset=sys.argv[7] 14 parser.add_option('-p', action="store", dest='new_file_path')
15 cdffull_name=sys.argv[9] 15 parser.add_option('-c', action="store", dest='inputcdffull_name')
16 ufl_name=sys.argv[10] 16 parser.add_option('-f', action="store", dest='inputufl_name')
17 ugp_name=sys.argv[11] 17 parser.add_option('-g', action="store", dest='inputugp_name')
18 acs_name=sys.argv[12] 18 parser.add_option('-a', action="store", dest='inputacs_name')
19 cdffull=sys.argv[14] 19 parser.add_option('-d', action="store", dest='inputcdffull')
20 ufl=sys.argv[15] 20 parser.add_option('-v', action="store", dest='inputufl')
21 ugp=sys.argv[16] 21 parser.add_option('-w', action="store", dest='inputugp')
22 acs=sys.argv[17] 22 parser.add_option('-b', action="store", dest='inputacs')
23 tumor=sys.argv[18] 23 parser.add_option('-t', action="store", dest='tumorcsv')
24 settingType=sys.argv[19] 24 parser.add_option('-y', action="store", dest='settingsType')
25 outputgraph=sys.argv[20] 25 parser.add_option('-o', action="store", dest='outputgraph')
26 zipfigures=sys.argv[21] 26 parser.add_option('-z', action="store", dest='zipfigures')
27 outputlog=sys.argv[22] 27 parser.add_option('-k', action="store", dest='outputlog')
28 log=sys.argv[23] 28 parser.add_option('-l', action="store", dest='log')
29 user=sys.argv[24] 29 parser.add_option('-u', action="store", dest='user_id')
30
31 parser.add_option('-i', action="append", dest='inputFile', default=[])
32 parser.add_option('-n', action='append', dest='inputFileName', default=[])
33
34 options, args = parser.parse_args()
35
36 dataSetName=options.dataSetName
37 destinationPath=os.path.join(options.new_file_path, options.user_id, dataSetName)
30 38
31 extra_file_names = sorted(os.listdir(extra_files_directory)) 39 mpagenomics_dir = os.path.join(options.new_file_path,"mpagenomics",options.user_id)
32 40 data_dir = os.path.join(options.new_file_path, options.user_id)
33 if (cdffull_name.count(",") != 0): 41
34 chipType=cdffull_name.split(",",1)[0]
35 tagExt=cdffull_name.split(",",1)[1]
36 tag=tagExt.split(".",1)[0]
37 else:
38 chipType=cdffull_name.split(".",1)[0]
39 tag=""
40
41 data_dir = os.path.join(new_files_directory, user, dataset)
42 mpagenomics_dir = os.path.join(new_files_directory, "mpagenomics",user)
43
44 try: 42 try:
45 os.makedirs(data_dir) 43 os.makedirs(data_dir)
46 except: 44 except:
47 shutil.rmtree(data_dir) 45 shutil.rmtree(data_dir)
48 os.makedirs(data_dir) 46 os.makedirs(data_dir)
49 47
50 if (not os.path.isdir(mpagenomics_dir)): 48 if (not os.path.isdir(mpagenomics_dir)):
51 os.makedirs(mpagenomics_dir) 49 os.makedirs(mpagenomics_dir)
52
53 for name in extra_file_names:
54 source = os.path.join(extra_files_directory, name)
55 # Strip _task_XXX from end of name
56 name_match = re.match(r"^\d+_task_(.*).dat$", name)
57 if name_match:
58 name = name_match.group(1)
59 else:
60 # Skip indices, composite extra_files_paths, etc...
61 continue
62 #escaped_name = name.replace("_", "-")
63 #dataset_name = "%s" % (name, 'visible', ext, db_key)
64 destination = os.path.join(data_dir, name)
65 _copy(source, destination)
66 # datasets_created.append(name)
67 50
68 _copy(cdffull,os.path.join(data_dir, cdffull_name)) 51 for inputFile, inputFileName in zip(options.inputFile,options.inputFileName):
69 _copy(ugp,os.path.join(data_dir, ugp_name)) 52 source = inputFile
70 _copy(ufl,os.path.join(data_dir, ufl_name)) 53 destination=os.path.join(data_dir,inputFileName)
71 _copy(acs,os.path.join(data_dir, acs_name)) 54 _copy(source,destination)
72 55
73
74 fig_dir = os.path.join("mpagenomics", user, "figures", dataset, "signal")
75 abs_fig_dir = os.path.join(new_files_directory, fig_dir)
76
77 56
78 retcode = _preprocess(chipType, dataset, mpagenomics_dir, data_dir, new_files_directory, tumor, settingType, outputgraph, outputlog, log, tag) 57 cdffull_name=options.inputcdffull_name
58 if (cdffull_name.count(",") != 0):
59 chipType=cdffull_name.split(",",1)[0]
60 tagExt=cdffull_name.split(",",1)[1]
61 tag=tagExt.split(".",1)[0]
62 else:
63 chipType=cdffull_name.split(".",1)[0]
64 tag=""
65
66 _copy(options.inputcdffull,os.path.join(data_dir, options.inputcdffull_name))
67 _copy(options.inputugp,os.path.join(data_dir, options.inputugp_name))
68 _copy(options.inputufl,os.path.join(data_dir, options.inputufl_name))
69 _copy(options.inputacs,os.path.join(data_dir, options.inputacs_name))
70
71
72 fig_dir = os.path.join("mpagenomics", options.user_id, "figures", dataSetName, "signal")
73 abs_fig_dir = os.path.join(options.new_file_path, fig_dir)
79 74
75
76 retcode = _preprocess(chipType, dataSetName, mpagenomics_dir, data_dir, options.new_file_path, options.tumorcsv, options.settingsType, options.outputgraph, options.outputlog, options.log, tag)
77
80 if (retcode == 0): 78 if (retcode == 0):
81 if (os.path.isdir(abs_fig_dir)) and (outputgraph == "TRUE"): 79 if (os.path.isdir(abs_fig_dir)) and (options.outputgraph == "TRUE"):
82 80
83 new_files = os.listdir(abs_fig_dir) 81 new_files = os.listdir(abs_fig_dir)
84 zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, zipfigures), 'w', zipfile.ZIP_DEFLATED) 82 zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, options.zipfigures), 'w', zipfile.ZIP_DEFLATED)
85 for current_file in new_files: 83 for current_file in new_files:
86 fn = os.path.join(abs_fig_dir, current_file) 84 fn = os.path.join(abs_fig_dir, current_file)
87 relfn = fn[len(abs_fig_dir) + len(os.sep):] 85 relfn = fn[len(abs_fig_dir) + len(os.sep):]
88 zipbuf.write(fn, relfn) 86 zipbuf.write(fn, relfn)
89 87
90 f = open(report, "w") 88 f = open(options.summary, "w")
91 # Create report 89 # Create report
92 try: 90 try:
93 for name in extra_file_names: 91 for inputFileName in options.inputFileName:
94 f.write("%s\t%s\t%s\n" %(re.match(r"^\d+_task_(.*).dat$", name).group(1),dataset,chipType)) 92 f.write("%s\t%s\t%s\n" %(inputFileName,dataSetName,chipType))
95 finally: 93 finally:
96 shutil.rmtree(data_dir) 94 shutil.rmtree(data_dir)
97 f.close() 95 f.close()
98 96
99 sys.exit(retcode) 97 sys.exit(retcode)
100 98
101 sys.exit(retcode) 99 sys.exit(retcode)
102 100
103 101
104 def _copy(source, destination): 102 def _copy(source, destination):
105 try: 103 try:
106 os.link(source, destination) 104 os.symlink(source, destination)
107 except: 105 except:
108 shutil.copy(source, destination) 106 shutil.copy(source, destination)
109 107
110 def _preprocess (chipType,dataset,mpagenomics_dir,data_dir,tmp_dir,tumor,settingType,outputgraph,outputlog,log,tag): 108 def _preprocess (chipType,dataset,mpagenomics_dir,data_dir,tmp_dir,tumor,settingType,outputgraph,outputlog,log,tag):
111 script_dir=os.path.dirname(os.path.abspath(__file__)) 109 script_dir=os.path.dirname(os.path.abspath(__file__))