comparison preprocess.py @ 1:4d25dec9707e

correction
author blanck
date Tue, 28 Apr 2015 11:23:47 +0200
parents a89bae08bf2d
children 54d549210759
comparison
equal deleted inserted replaced
0:a89bae08bf2d 1:4d25dec9707e
2 import re 2 import re
3 import shutil 3 import shutil
4 import sys 4 import sys
5 import subprocess 5 import subprocess
6 import zipfile 6 import zipfile
7 import optparse 7
8 8
9 def main(): 9 def main():
10 10
11 parser = optparse.OptionParser() 11 extra_files_directory = sys.argv[1]
12 parser.add_option('-s', action="store", dest='summary') 12 report = sys.argv[4]
13 parser.add_option('-p', action="store", dest='new_file_path') 13 new_files_directory = sys.argv[6]
14 parser.add_option('-c', action="store", dest='inputcdffull_name') 14 dataset=sys.argv[7]
15 parser.add_option('-f', action="store", dest='inputufl_name') 15 cdffull_name=sys.argv[9]
16 parser.add_option('-g', action="store", dest='inputugp_name') 16 ufl_name=sys.argv[10]
17 parser.add_option('-a', action="store", dest='inputacs_name') 17 ugp_name=sys.argv[11]
18 parser.add_option('-d', action="store", dest='inputcdffull') 18 acs_name=sys.argv[12]
19 parser.add_option('-v', action="store", dest='inputufl') 19 cdffull=sys.argv[14]
20 parser.add_option('-h', action="store", dest='inputugp') 20 ufl=sys.argv[15]
21 parser.add_option('-b', action="store", dest='inputacs') 21 ugp=sys.argv[16]
22 parser.add_option('-t', action="store", dest='tumorcsv') 22 acs=sys.argv[17]
23 parser.add_option('-y', action="store", dest='settingsType') 23 tumor=sys.argv[18]
24 parser.add_option('-o', action="store", dest='outputgraph') 24 settingType=sys.argv[19]
25 parser.add_option('-z', action="store", dest='zipfigures') 25 outputgraph=sys.argv[20]
26 parser.add_option('-k', action="store", dest='outputlog') 26 zipfigures=sys.argv[21]
27 parser.add_option('-l', action="store", dest='log') 27 outputlog=sys.argv[22]
28 parser.add_option('-u', action="store", dest='user_id') 28 log=sys.argv[23]
29 29 user=sys.argv[24]
30 parser.add_option('-i', action="append", dest='inputFile', default=[]) 30
31 parser.add_option('-n', action='append', dest='inputFileName', default=[]) 31 extra_file_names = sorted(os.listdir(extra_files_directory))
32 32
33 options, args = parser.parse_args() 33 if (cdffull_name.count(",") != 0):
34 outputFileName=options.outputFile 34 chipType=cdffull_name.split(",",1)[0]
35 35 tagExt=cdffull_name.split(",",1)[1]
36 print options.inputFile 36 tag=tagExt.split(".",1)[0]
37 print options.inputFileName 37 else:
38 38 chipType=cdffull_name.split(".",1)[0]
39 dataSetName="dataset" 39 tag=""
40 destinationPath=os.path.join(options.new_file_path, user, dataset) 40
41 41 data_dir = os.path.join(new_files_directory, user, dataset)
42 mpagenomics_dir = os.path.join(destinationPath,"mpagenomics",user) 42 mpagenomics_dir = os.path.join(new_files_directory, "mpagenomics",user)
43 data_dir = os.path.join(options.new_file_path, user) 43
44
45 try: 44 try:
46 os.makedirs(data_dir) 45 os.makedirs(data_dir)
47 except: 46 except:
48 shutil.rmtree(data_dir) 47 shutil.rmtree(data_dir)
49 os.makedirs(data_dir) 48 os.makedirs(data_dir)
50 49
51 if (not os.path.isdir(mpagenomics_dir)): 50 if (not os.path.isdir(mpagenomics_dir)):
52 os.makedirs(mpagenomics_dir) 51 os.makedirs(mpagenomics_dir)
52
53 for name in extra_file_names:
54 source = os.path.join(extra_files_directory, name)
55 # Strip _task_XXX from end of name
56 name_match = re.match(r"^\d+_task_(.*).dat$", name)
57 if name_match:
58 name = name_match.group(1)
59 else:
60 # Skip indices, composite extra_files_paths, etc...
61 continue
62 #escaped_name = name.replace("_", "-")
63 #dataset_name = "%s" % (name, 'visible', ext, db_key)
64 destination = os.path.join(data_dir, name)
65 _copy(source, destination)
66 # datasets_created.append(name)
53 67
54 for inputFile, inputFileName in zip(options.inputFile,options.inputFileName):
55 source = inputFile
56 destination=os.path.join(data_dir,inputFileName)
57 os.symlink(source,destination)
58
59 if (cdffull_name.count(",") != 0):
60 chipType=cdffull_name.split(",",1)[0]
61 tagExt=cdffull_name.split(",",1)[1]
62 tag=tagExt.split(".",1)[0]
63 else:
64 chipType=cdffull_name.split(".",1)[0]
65 tag=""
66
67 _copy(cdffull,os.path.join(data_dir, cdffull_name)) 68 _copy(cdffull,os.path.join(data_dir, cdffull_name))
68 _copy(ugp,os.path.join(data_dir, ugp_name)) 69 _copy(ugp,os.path.join(data_dir, ugp_name))
69 _copy(ufl,os.path.join(data_dir, ufl_name)) 70 _copy(ufl,os.path.join(data_dir, ufl_name))
70 _copy(acs,os.path.join(data_dir, acs_name)) 71 _copy(acs,os.path.join(data_dir, acs_name))
71
72 72
73
73 fig_dir = os.path.join("mpagenomics", user, "figures", dataset, "signal") 74 fig_dir = os.path.join("mpagenomics", user, "figures", dataset, "signal")
74 abs_fig_dir = os.path.join(new_files_directory, fig_dir) 75 abs_fig_dir = os.path.join(new_files_directory, fig_dir)
76
77
78 retcode = _preprocess(chipType, dataset, mpagenomics_dir, data_dir, new_files_directory, tumor, settingType, outputgraph, outputlog, log, tag)
75 79
76
77 retcode = _preprocess(chipType, dataSetName, mpagenomics_dir, data_dir, options.new_file_path, options.tumorcsv, options.settingType, options.outputgraph, options.outputlog, options.log, tag)
78
79 if (retcode == 0): 80 if (retcode == 0):
80 if (os.path.isdir(abs_fig_dir)) and (outputgraph == "TRUE"): 81 if (os.path.isdir(abs_fig_dir)) and (outputgraph == "TRUE"):
81 82
82 new_files = os.listdir(abs_fig_dir) 83 new_files = os.listdir(abs_fig_dir)
83 zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, zipfigures), 'w', zipfile.ZIP_DEFLATED) 84 zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, zipfigures), 'w', zipfile.ZIP_DEFLATED)
84 for current_file in new_files: 85 for current_file in new_files:
85 fn = os.path.join(abs_fig_dir, current_file) 86 fn = os.path.join(abs_fig_dir, current_file)
86 relfn = fn[len(abs_fig_dir) + len(os.sep):] 87 relfn = fn[len(abs_fig_dir) + len(os.sep):]
87 zipbuf.write(fn, relfn) 88 zipbuf.write(fn, relfn)
88 89
89 f = open(report, "w") 90 f = open(report, "w")
90 # Create report 91 # Create report
91 try: 92 try:
92 for name in extra_file_names: 93 for name in extra_file_names:
93 f.write("%s\t%s\t%s\n" %(re.match(r"^\d+_task_(.*).dat$", name).group(1),dataset,chipType)) 94 f.write("%s\t%s\t%s\n" %(re.match(r"^\d+_task_(.*).dat$", name).group(1),dataset,chipType))
94 finally: 95 finally:
95 shutil.rmtree(data_dir) 96 shutil.rmtree(data_dir)
96 f.close() 97 f.close()
97 98
98 sys.exit(retcode) 99 sys.exit(retcode)
99 100
100 sys.exit(retcode) 101 sys.exit(retcode)
101 102
102 103
103 def _copy(source, destination): 104 def _copy(source, destination):
104 try: 105 try:
105 os.link(source, destination) 106 os.link(source, destination)
106 except: 107 except: