Mercurial > repos > sblanck > mpagenomics_normalize
changeset 2:54d549210759
correction
author | blanck |
---|---|
date | Tue, 28 Apr 2015 11:26:30 +0200 |
parents | 4d25dec9707e |
children | c6ab9e172cc7 |
files | preprocess.py preprocess.xml |
diffstat | 2 files changed, 68 insertions(+), 70 deletions(-) [+] |
line wrap: on
line diff
--- a/preprocess.py Tue Apr 28 11:23:47 2015 +0200 +++ b/preprocess.py Tue Apr 28 11:26:30 2015 +0200 @@ -4,43 +4,41 @@ import sys import subprocess import zipfile - +import optparse def main(): - extra_files_directory = sys.argv[1] - report = sys.argv[4] - new_files_directory = sys.argv[6] - dataset=sys.argv[7] - cdffull_name=sys.argv[9] - ufl_name=sys.argv[10] - ugp_name=sys.argv[11] - acs_name=sys.argv[12] - cdffull=sys.argv[14] - ufl=sys.argv[15] - ugp=sys.argv[16] - acs=sys.argv[17] - tumor=sys.argv[18] - settingType=sys.argv[19] - outputgraph=sys.argv[20] - zipfigures=sys.argv[21] - outputlog=sys.argv[22] - log=sys.argv[23] - user=sys.argv[24] + parser = optparse.OptionParser() + parser.add_option('-s', action="store", dest='summary') + parser.add_option('-e', action="store", dest='dataSetName') + parser.add_option('-p', action="store", dest='new_file_path') + parser.add_option('-c', action="store", dest='inputcdffull_name') + parser.add_option('-f', action="store", dest='inputufl_name') + parser.add_option('-g', action="store", dest='inputugp_name') + parser.add_option('-a', action="store", dest='inputacs_name') + parser.add_option('-d', action="store", dest='inputcdffull') + parser.add_option('-v', action="store", dest='inputufl') + parser.add_option('-w', action="store", dest='inputugp') + parser.add_option('-b', action="store", dest='inputacs') + parser.add_option('-t', action="store", dest='tumorcsv') + parser.add_option('-y', action="store", dest='settingsType') + parser.add_option('-o', action="store", dest='outputgraph') + parser.add_option('-z', action="store", dest='zipfigures') + parser.add_option('-k', action="store", dest='outputlog') + parser.add_option('-l', action="store", dest='log') + parser.add_option('-u', action="store", dest='user_id') + + parser.add_option('-i', action="append", dest='inputFile', default=[]) + parser.add_option('-n', action='append', dest='inputFileName', default=[]) + + options, args = parser.parse_args() + + dataSetName=options.dataSetName + destinationPath=os.path.join(options.new_file_path, options.user_id, dataSetName) - extra_file_names = sorted(os.listdir(extra_files_directory)) - - if (cdffull_name.count(",") != 0): - chipType=cdffull_name.split(",",1)[0] - tagExt=cdffull_name.split(",",1)[1] - tag=tagExt.split(".",1)[0] - else: - chipType=cdffull_name.split(".",1)[0] - tag="" - - data_dir = os.path.join(new_files_directory, user, dataset) - mpagenomics_dir = os.path.join(new_files_directory, "mpagenomics",user) - + mpagenomics_dir = os.path.join(options.new_file_path,"mpagenomics",options.user_id) + data_dir = os.path.join(options.new_file_path, options.user_id) + try: os.makedirs(data_dir) except: @@ -49,61 +47,61 @@ if (not os.path.isdir(mpagenomics_dir)): os.makedirs(mpagenomics_dir) - - for name in extra_file_names: - source = os.path.join(extra_files_directory, name) - # Strip _task_XXX from end of name - name_match = re.match(r"^\d+_task_(.*).dat$", name) - if name_match: - name = name_match.group(1) - else: - # Skip indices, composite extra_files_paths, etc... - continue - #escaped_name = name.replace("_", "-") - #dataset_name = "%s" % (name, 'visible', ext, db_key) - destination = os.path.join(data_dir, name) - _copy(source, destination) -# datasets_created.append(name) + + for inputFile, inputFileName in zip(options.inputFile,options.inputFileName): + source = inputFile + destination=os.path.join(data_dir,inputFileName) + _copy(source,destination) + - _copy(cdffull,os.path.join(data_dir, cdffull_name)) - _copy(ugp,os.path.join(data_dir, ugp_name)) - _copy(ufl,os.path.join(data_dir, ufl_name)) - _copy(acs,os.path.join(data_dir, acs_name)) + cdffull_name=options.inputcdffull_name + if (cdffull_name.count(",") != 0): + chipType=cdffull_name.split(",",1)[0] + tagExt=cdffull_name.split(",",1)[1] + tag=tagExt.split(".",1)[0] + else: + chipType=cdffull_name.split(".",1)[0] + tag="" + + _copy(options.inputcdffull,os.path.join(data_dir, options.inputcdffull_name)) + _copy(options.inputugp,os.path.join(data_dir, options.inputugp_name)) + _copy(options.inputufl,os.path.join(data_dir, options.inputufl_name)) + _copy(options.inputacs,os.path.join(data_dir, options.inputacs_name)) + - - fig_dir = os.path.join("mpagenomics", user, "figures", dataset, "signal") - abs_fig_dir = os.path.join(new_files_directory, fig_dir) - - - retcode = _preprocess(chipType, dataset, mpagenomics_dir, data_dir, new_files_directory, tumor, settingType, outputgraph, outputlog, log, tag) + fig_dir = os.path.join("mpagenomics", options.user_id, "figures", dataSetName, "signal") + abs_fig_dir = os.path.join(options.new_file_path, fig_dir) + + retcode = _preprocess(chipType, dataSetName, mpagenomics_dir, data_dir, options.new_file_path, options.tumorcsv, options.settingsType, options.outputgraph, options.outputlog, options.log, tag) + if (retcode == 0): - if (os.path.isdir(abs_fig_dir)) and (outputgraph == "TRUE"): - + if (os.path.isdir(abs_fig_dir)) and (options.outputgraph == "TRUE"): + new_files = os.listdir(abs_fig_dir) - zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, zipfigures), 'w', zipfile.ZIP_DEFLATED) + zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, options.zipfigures), 'w', zipfile.ZIP_DEFLATED) for current_file in new_files: fn = os.path.join(abs_fig_dir, current_file) relfn = fn[len(abs_fig_dir) + len(os.sep):] zipbuf.write(fn, relfn) - - f = open(report, "w") + + f = open(options.summary, "w") # Create report try: - for name in extra_file_names: - f.write("%s\t%s\t%s\n" %(re.match(r"^\d+_task_(.*).dat$", name).group(1),dataset,chipType)) + for inputFileName in options.inputFileName: + f.write("%s\t%s\t%s\n" %(inputFileName,dataSetName,chipType)) finally: shutil.rmtree(data_dir) f.close() - + sys.exit(retcode) - + sys.exit(retcode) - + def _copy(source, destination): try: - os.link(source, destination) + os.symlink(source, destination) except: shutil.copy(source, destination)
--- a/preprocess.xml Tue Apr 28 11:23:47 2015 +0200 +++ b/preprocess.xml Tue Apr 28 11:26:30 2015 +0200 @@ -1,7 +1,7 @@ <tool id="preprocess2" name="Data Normalization" force_history_refresh="True" version="0.1.0"> <command interpreter="python"> - preprocess2.py + preprocess.py -s '$summary' -p '$__new_file_path__' -c '$inputcdffull.name'