Mercurial > repos > sblanck > mpagenomics_normalize
view preprocess.py @ 1:4d25dec9707e
correction
author | blanck |
---|---|
date | Tue, 28 Apr 2015 11:23:47 +0200 |
parents | a89bae08bf2d |
children | 54d549210759 |
line wrap: on
line source
import os import re import shutil import sys import subprocess import zipfile def main(): extra_files_directory = sys.argv[1] report = sys.argv[4] new_files_directory = sys.argv[6] dataset=sys.argv[7] cdffull_name=sys.argv[9] ufl_name=sys.argv[10] ugp_name=sys.argv[11] acs_name=sys.argv[12] cdffull=sys.argv[14] ufl=sys.argv[15] ugp=sys.argv[16] acs=sys.argv[17] tumor=sys.argv[18] settingType=sys.argv[19] outputgraph=sys.argv[20] zipfigures=sys.argv[21] outputlog=sys.argv[22] log=sys.argv[23] user=sys.argv[24] extra_file_names = sorted(os.listdir(extra_files_directory)) if (cdffull_name.count(",") != 0): chipType=cdffull_name.split(",",1)[0] tagExt=cdffull_name.split(",",1)[1] tag=tagExt.split(".",1)[0] else: chipType=cdffull_name.split(".",1)[0] tag="" data_dir = os.path.join(new_files_directory, user, dataset) mpagenomics_dir = os.path.join(new_files_directory, "mpagenomics",user) try: os.makedirs(data_dir) except: shutil.rmtree(data_dir) os.makedirs(data_dir) if (not os.path.isdir(mpagenomics_dir)): os.makedirs(mpagenomics_dir) for name in extra_file_names: source = os.path.join(extra_files_directory, name) # Strip _task_XXX from end of name name_match = re.match(r"^\d+_task_(.*).dat$", name) if name_match: name = name_match.group(1) else: # Skip indices, composite extra_files_paths, etc... continue #escaped_name = name.replace("_", "-") #dataset_name = "%s" % (name, 'visible', ext, db_key) destination = os.path.join(data_dir, name) _copy(source, destination) # datasets_created.append(name) _copy(cdffull,os.path.join(data_dir, cdffull_name)) _copy(ugp,os.path.join(data_dir, ugp_name)) _copy(ufl,os.path.join(data_dir, ufl_name)) _copy(acs,os.path.join(data_dir, acs_name)) fig_dir = os.path.join("mpagenomics", user, "figures", dataset, "signal") abs_fig_dir = os.path.join(new_files_directory, fig_dir) retcode = _preprocess(chipType, dataset, mpagenomics_dir, data_dir, new_files_directory, tumor, settingType, outputgraph, outputlog, log, tag) if (retcode == 0): if (os.path.isdir(abs_fig_dir)) and (outputgraph == "TRUE"): new_files = os.listdir(abs_fig_dir) zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, zipfigures), 'w', zipfile.ZIP_DEFLATED) for current_file in new_files: fn = os.path.join(abs_fig_dir, current_file) relfn = fn[len(abs_fig_dir) + len(os.sep):] zipbuf.write(fn, relfn) f = open(report, "w") # Create report try: for name in extra_file_names: f.write("%s\t%s\t%s\n" %(re.match(r"^\d+_task_(.*).dat$", name).group(1),dataset,chipType)) finally: shutil.rmtree(data_dir) f.close() sys.exit(retcode) sys.exit(retcode) def _copy(source, destination): try: os.link(source, destination) except: shutil.copy(source, destination) def _preprocess (chipType,dataset,mpagenomics_dir,data_dir,tmp_dir,tumor,settingType,outputgraph,outputlog,log,tag): script_dir=os.path.dirname(os.path.abspath(__file__)) if (outputlog=="TRUE"): errfile=open(log,'w') else: errfile=open(os.path.join(tmp_dir,"errfile.log"),'w') retcode = subprocess.call(["Rscript", os.path.join(script_dir,"preprocess.R"), chipType, dataset, mpagenomics_dir, data_dir, tumor, settingType, outputgraph, tag], stdout = errfile, stderr = errfile) return(retcode) if __name__ == "__main__": main()