Mercurial > repos > sblanck > mpagenomics_normalize
changeset 1:4d25dec9707e
correction
author | blanck |
---|---|
date | Tue, 28 Apr 2015 11:23:47 +0200 |
parents | a89bae08bf2d |
children | 54d549210759 |
files | preprocess.py preprocess.xml |
diffstat | 2 files changed, 68 insertions(+), 68 deletions(-) [+] |
line wrap: on
line diff
--- a/preprocess.py Mon Apr 27 05:48:52 2015 -0400 +++ b/preprocess.py Tue Apr 28 11:23:47 2015 +0200 @@ -4,44 +4,43 @@ import sys import subprocess import zipfile -import optparse + def main(): - parser = optparse.OptionParser() - parser.add_option('-s', action="store", dest='summary') - parser.add_option('-p', action="store", dest='new_file_path') - parser.add_option('-c', action="store", dest='inputcdffull_name') - parser.add_option('-f', action="store", dest='inputufl_name') - parser.add_option('-g', action="store", dest='inputugp_name') - parser.add_option('-a', action="store", dest='inputacs_name') - parser.add_option('-d', action="store", dest='inputcdffull') - parser.add_option('-v', action="store", dest='inputufl') - parser.add_option('-h', action="store", dest='inputugp') - parser.add_option('-b', action="store", dest='inputacs') - parser.add_option('-t', action="store", dest='tumorcsv') - parser.add_option('-y', action="store", dest='settingsType') - parser.add_option('-o', action="store", dest='outputgraph') - parser.add_option('-z', action="store", dest='zipfigures') - parser.add_option('-k', action="store", dest='outputlog') - parser.add_option('-l', action="store", dest='log') - parser.add_option('-u', action="store", dest='user_id') - - parser.add_option('-i', action="append", dest='inputFile', default=[]) - parser.add_option('-n', action='append', dest='inputFileName', default=[]) + extra_files_directory = sys.argv[1] + report = sys.argv[4] + new_files_directory = sys.argv[6] + dataset=sys.argv[7] + cdffull_name=sys.argv[9] + ufl_name=sys.argv[10] + ugp_name=sys.argv[11] + acs_name=sys.argv[12] + cdffull=sys.argv[14] + ufl=sys.argv[15] + ugp=sys.argv[16] + acs=sys.argv[17] + tumor=sys.argv[18] + settingType=sys.argv[19] + outputgraph=sys.argv[20] + zipfigures=sys.argv[21] + outputlog=sys.argv[22] + log=sys.argv[23] + user=sys.argv[24] + + extra_file_names = sorted(os.listdir(extra_files_directory)) - options, args = parser.parse_args() - outputFileName=options.outputFile - - print options.inputFile - print options.inputFileName - - dataSetName="dataset" - destinationPath=os.path.join(options.new_file_path, user, dataset) - - mpagenomics_dir = os.path.join(destinationPath,"mpagenomics",user) - data_dir = os.path.join(options.new_file_path, user) - + if (cdffull_name.count(",") != 0): + chipType=cdffull_name.split(",",1)[0] + tagExt=cdffull_name.split(",",1)[1] + tag=tagExt.split(".",1)[0] + else: + chipType=cdffull_name.split(".",1)[0] + tag="" + + data_dir = os.path.join(new_files_directory, user, dataset) + mpagenomics_dir = os.path.join(new_files_directory, "mpagenomics",user) + try: os.makedirs(data_dir) except: @@ -50,42 +49,44 @@ if (not os.path.isdir(mpagenomics_dir)): os.makedirs(mpagenomics_dir) + + for name in extra_file_names: + source = os.path.join(extra_files_directory, name) + # Strip _task_XXX from end of name + name_match = re.match(r"^\d+_task_(.*).dat$", name) + if name_match: + name = name_match.group(1) + else: + # Skip indices, composite extra_files_paths, etc... + continue + #escaped_name = name.replace("_", "-") + #dataset_name = "%s" % (name, 'visible', ext, db_key) + destination = os.path.join(data_dir, name) + _copy(source, destination) +# datasets_created.append(name) - for inputFile, inputFileName in zip(options.inputFile,options.inputFileName): - source = inputFile - destination=os.path.join(data_dir,inputFileName) - os.symlink(source,destination) - - if (cdffull_name.count(",") != 0): - chipType=cdffull_name.split(",",1)[0] - tagExt=cdffull_name.split(",",1)[1] - tag=tagExt.split(".",1)[0] - else: - chipType=cdffull_name.split(".",1)[0] - tag="" - _copy(cdffull,os.path.join(data_dir, cdffull_name)) _copy(ugp,os.path.join(data_dir, ugp_name)) _copy(ufl,os.path.join(data_dir, ufl_name)) _copy(acs,os.path.join(data_dir, acs_name)) - + fig_dir = os.path.join("mpagenomics", user, "figures", dataset, "signal") abs_fig_dir = os.path.join(new_files_directory, fig_dir) + + + retcode = _preprocess(chipType, dataset, mpagenomics_dir, data_dir, new_files_directory, tumor, settingType, outputgraph, outputlog, log, tag) - - retcode = _preprocess(chipType, dataSetName, mpagenomics_dir, data_dir, options.new_file_path, options.tumorcsv, options.settingType, options.outputgraph, options.outputlog, options.log, tag) - if (retcode == 0): if (os.path.isdir(abs_fig_dir)) and (outputgraph == "TRUE"): - + new_files = os.listdir(abs_fig_dir) zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, zipfigures), 'w', zipfile.ZIP_DEFLATED) for current_file in new_files: fn = os.path.join(abs_fig_dir, current_file) relfn = fn[len(abs_fig_dir) + len(os.sep):] zipbuf.write(fn, relfn) - + f = open(report, "w") # Create report try: @@ -94,11 +95,11 @@ finally: shutil.rmtree(data_dir) f.close() - + sys.exit(retcode) - + sys.exit(retcode) - + def _copy(source, destination): try:
--- a/preprocess.xml Mon Apr 27 05:48:52 2015 -0400 +++ b/preprocess.xml Tue Apr 28 11:23:47 2015 +0200 @@ -1,10 +1,7 @@ -<tool id="preprocess" name="Data Normalization" force_history_refresh="True" version="0.1.0"> - <requirements> - <requirement type="set_environment">R_SCRIPT_PATH</requirement> - <requirement type="package" version="1.1.2">mpagenomics</requirement> - </requirements> +<tool id="preprocess2" name="Data Normalization" force_history_refresh="True" version="0.1.0"> + <command interpreter="python"> - preprocess.py + preprocess2.py -s '$summary' -p '$__new_file_path__' -c '$inputcdffull.name' @@ -13,8 +10,9 @@ -a '$inputacs.name' -d '$inputcdffull' -v '$inputufl' - -h '$inputugp' - -b '$inputacs' + -w '$inputugp' + -b '$inputacs' + -e 'datasetName' #if $settings.settingsType == "tumor": -t '$tumorcsv' #end if @@ -29,10 +27,11 @@ -u '$__user_id__' #for $input in $inputs -i "${input}" - -n "${input.name} + -n "${input.name}" #end for </command> <inputs> + <param name="datasetName" type="text" label="Dataset Name"/> <param name="inputs" type="data" format="cel" multiple="True" label="Cel files dataset" help="Cel files dataset previously uploaded with the Multiple File Datasets tool."/> <param name="inputcdffull" type="data" format="cdf" label="cdf file" help=".cdf file name must comply with the following format : < chiptype >,< tag >.cdf (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full.cdf)." /> <param name="inputufl" type="data" format="ufl" label="ufl file" help=".ufl file name must start with < chiptype >,< tag > (e.g, for a GenomeWideSNP_6 chip: GenomeWideSNP_6,Full,na31,hg19,HB20110328.ufl)."/> @@ -67,11 +66,11 @@ variable outputs require a primary dataset. If hidden refresh doesn't occur. --> - <data format="dsf" name="summary" label="Dataset summary file of ${input.name} " /> - <data format="zip" name="zipfigures" label="figures of normalization of ${input.name}"> + <data format="dsf" name="summary" label="Dataset summary file of ${datasetName}" /> + <data format="zip" name="zipfigures" label="figures of normalization of ${datasetName}"> <filter>outputgraph == "TRUE"</filter> </data> - <data format="log" name="log" label="log of normalization of ${input.name}"> + <data format="log" name="log" label="log of normalization ${datasetName}"> <filter>outputlog == "TRUE"</filter> </data> </outputs>