Mercurial > repos > sblanck > mpagenomics_wrappers
diff mpagenomics_normalize-7dc6ce39fb89/preprocess.py @ 0:84b13b0e2b85
Uploaded
| author | sblanck |
|---|---|
| date | Thu, 07 May 2015 08:22:36 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mpagenomics_normalize-7dc6ce39fb89/preprocess.py Thu May 07 08:22:36 2015 -0400 @@ -0,0 +1,121 @@ +import os +import re +import shutil +import sys +import subprocess +import zipfile +import optparse + +def main(): + + parser = optparse.OptionParser() + parser.add_option('-s', action="store", dest='summary') + parser.add_option('-e', action="store", dest='dataSetName') + parser.add_option('-p', action="store", dest='new_file_path') + parser.add_option('-c', action="store", dest='inputcdffull_name') + parser.add_option('-f', action="store", dest='inputufl_name') + parser.add_option('-g', action="store", dest='inputugp_name') + parser.add_option('-a', action="store", dest='inputacs_name') + parser.add_option('-d', action="store", dest='inputcdffull') + parser.add_option('-v', action="store", dest='inputufl') + parser.add_option('-w', action="store", dest='inputugp') + parser.add_option('-b', action="store", dest='inputacs') + parser.add_option('-t', action="store", dest='tumorcsv') + parser.add_option('-y', action="store", dest='settingsType') + parser.add_option('-o', action="store", dest='outputgraph') + parser.add_option('-z', action="store", dest='zipfigures') + parser.add_option('-k', action="store", dest='outputlog') + parser.add_option('-l', action="store", dest='log') + parser.add_option('-u', action="store", dest='user_id') + + parser.add_option('-i', action="append", dest='inputFile', default=[]) + parser.add_option('-n', action='append', dest='inputFileName', default=[]) + + options, args = parser.parse_args() + + dataSetName=options.dataSetName + destinationPath=os.path.join(options.new_file_path, options.user_id, dataSetName) + + mpagenomics_dir = os.path.join(options.new_file_path,"mpagenomics",options.user_id) + data_dir = os.path.join(options.new_file_path, options.user_id) + + try: + os.makedirs(data_dir) + except: + shutil.rmtree(data_dir) + os.makedirs(data_dir) + + if (not os.path.isdir(mpagenomics_dir)): + os.makedirs(mpagenomics_dir) + + for inputFile, inputFileName in zip(options.inputFile,options.inputFileName): + source = inputFile + destination=os.path.join(data_dir,inputFileName) + _copy(source,destination) + + + cdffull_name=options.inputcdffull_name + if (cdffull_name.count(",") != 0): + chipType=cdffull_name.split(",",1)[0] + tagExt=cdffull_name.split(",",1)[1] + tag=tagExt.split(".",1)[0] + else: + chipType=cdffull_name.split(".",1)[0] + tag="" + + _copy(options.inputcdffull,os.path.join(data_dir, options.inputcdffull_name)) + _copy(options.inputugp,os.path.join(data_dir, options.inputugp_name)) + _copy(options.inputufl,os.path.join(data_dir, options.inputufl_name)) + _copy(options.inputacs,os.path.join(data_dir, options.inputacs_name)) + + + fig_dir = os.path.join("mpagenomics", options.user_id, "figures", dataSetName, "signal") + abs_fig_dir = os.path.join(options.new_file_path, fig_dir) + + + retcode = _preprocess(chipType, dataSetName, mpagenomics_dir, data_dir, options.new_file_path, options.tumorcsv, options.settingsType, options.outputgraph, options.outputlog, options.log, tag) + + if (retcode == 0): + if (os.path.isdir(abs_fig_dir)) and (options.outputgraph == "TRUE"): + + new_files = os.listdir(abs_fig_dir) + zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, options.zipfigures), 'w', zipfile.ZIP_DEFLATED) + for current_file in new_files: + fn = os.path.join(abs_fig_dir, current_file) + relfn = fn[len(abs_fig_dir) + len(os.sep):] + zipbuf.write(fn, relfn) + + f = open(options.summary, "w") + # Create report + try: + for inputFileName in options.inputFileName: + f.write("%s\t%s\t%s\n" %(inputFileName,dataSetName,chipType)) + finally: + shutil.rmtree(data_dir) + f.close() + + sys.exit(retcode) + + sys.exit(retcode) + + +def _copy(source, destination): + try: + os.symlink(source, destination) + except: + shutil.copy(source, destination) + +def _preprocess (chipType,dataset,mpagenomics_dir,data_dir,tmp_dir,tumor,settingType,outputgraph,outputlog,log,tag): + script_dir=os.path.dirname(os.path.abspath(__file__)) + + if (outputlog=="TRUE"): + errfile=open(log,'w') + else: + errfile=open(os.path.join(tmp_dir,"errfile.log"),'w') + + retcode = subprocess.call(["Rscript", os.path.join(script_dir,"preprocess.R"), chipType, dataset, mpagenomics_dir, data_dir, tumor, settingType, outputgraph, tag], stdout = errfile, stderr = errfile) + return(retcode) + + +if __name__ == "__main__": + main()
