diff preprocess.py @ 0:a89bae08bf2d

Uploaded
author sblanck
date Mon, 27 Apr 2015 05:48:52 -0400
parents
children 4d25dec9707e
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preprocess.py	Mon Apr 27 05:48:52 2015 -0400
@@ -0,0 +1,122 @@
+import os
+import re
+import shutil
+import sys
+import subprocess
+import zipfile
+import optparse
+
+def main():
+    
+    parser = optparse.OptionParser()
+    parser.add_option('-s', action="store", dest='summary')
+    parser.add_option('-p', action="store", dest='new_file_path')
+    parser.add_option('-c', action="store", dest='inputcdffull_name')
+    parser.add_option('-f', action="store", dest='inputufl_name')
+    parser.add_option('-g', action="store", dest='inputugp_name')
+    parser.add_option('-a', action="store", dest='inputacs_name')
+    parser.add_option('-d', action="store", dest='inputcdffull')
+    parser.add_option('-v', action="store", dest='inputufl')
+    parser.add_option('-h', action="store", dest='inputugp')
+    parser.add_option('-b', action="store", dest='inputacs')
+    parser.add_option('-t', action="store", dest='tumorcsv')
+    parser.add_option('-y', action="store", dest='settingsType')
+    parser.add_option('-o', action="store", dest='outputgraph')
+    parser.add_option('-z', action="store", dest='zipfigures')
+    parser.add_option('-k', action="store", dest='outputlog')
+    parser.add_option('-l', action="store", dest='log')
+    parser.add_option('-u', action="store", dest='user_id')
+
+    parser.add_option('-i', action="append", dest='inputFile', default=[])
+    parser.add_option('-n', action='append', dest='inputFileName', default=[])
+        
+    options, args = parser.parse_args()
+    outputFileName=options.outputFile
+    
+    print options.inputFile
+    print options.inputFileName
+    
+    dataSetName="dataset"
+    destinationPath=os.path.join(options.new_file_path, user, dataset)
+    
+    mpagenomics_dir = os.path.join(destinationPath,"mpagenomics",user)
+    data_dir = os.path.join(options.new_file_path, user)
+    
+    try:
+        os.makedirs(data_dir)
+    except:
+        shutil.rmtree(data_dir)
+        os.makedirs(data_dir)
+    
+    if (not os.path.isdir(mpagenomics_dir)):
+        os.makedirs(mpagenomics_dir)
+    
+    for inputFile, inputFileName in zip(options.inputFile,options.inputFileName):
+        source = inputFile
+        destination=os.path.join(data_dir,inputFileName)
+        os.symlink(source,destination)
+         
+    if (cdffull_name.count(",") != 0):    
+        chipType=cdffull_name.split(",",1)[0]
+        tagExt=cdffull_name.split(",",1)[1]
+        tag=tagExt.split(".",1)[0]
+    else:
+        chipType=cdffull_name.split(".",1)[0]
+        tag=""
+         
+    _copy(cdffull,os.path.join(data_dir, cdffull_name))
+    _copy(ugp,os.path.join(data_dir, ugp_name))
+    _copy(ufl,os.path.join(data_dir, ufl_name))
+    _copy(acs,os.path.join(data_dir, acs_name))
+     
+    
+    fig_dir = os.path.join("mpagenomics", user, "figures", dataset, "signal")
+    abs_fig_dir = os.path.join(new_files_directory, fig_dir)
+        
+     
+    retcode = _preprocess(chipType, dataSetName, mpagenomics_dir, data_dir, options.new_file_path, options.tumorcsv, options.settingType, options.outputgraph, options.outputlog, options.log, tag)
+         
+    if (retcode == 0):
+        if (os.path.isdir(abs_fig_dir)) and (outputgraph == "TRUE"):
+         
+            new_files = os.listdir(abs_fig_dir)
+            zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, zipfigures), 'w', zipfile.ZIP_DEFLATED)
+            for current_file in new_files:
+                fn = os.path.join(abs_fig_dir, current_file)
+                relfn = fn[len(abs_fig_dir) + len(os.sep):]
+                zipbuf.write(fn, relfn)
+         
+        f = open(report, "w")   
+        # Create report
+        try:
+            for name in extra_file_names:
+                f.write("%s\t%s\t%s\n" %(re.match(r"^\d+_task_(.*).dat$", name).group(1),dataset,chipType))
+        finally:
+            shutil.rmtree(data_dir)
+            f.close()
+         
+        sys.exit(retcode)
+         
+    sys.exit(retcode)
+     
+    
+def _copy(source, destination):
+    try:
+        os.link(source, destination)
+    except:
+        shutil.copy(source, destination)
+
+def _preprocess (chipType,dataset,mpagenomics_dir,data_dir,tmp_dir,tumor,settingType,outputgraph,outputlog,log,tag):
+    script_dir=os.path.dirname(os.path.abspath(__file__))
+    
+    if (outputlog=="TRUE"):
+        errfile=open(log,'w')
+    else:
+        errfile=open(os.path.join(tmp_dir,"errfile.log"),'w')
+    
+    retcode = subprocess.call(["Rscript", os.path.join(script_dir,"preprocess.R"), chipType, dataset, mpagenomics_dir, data_dir, tumor, settingType, outputgraph, tag], stdout = errfile, stderr = errfile)
+    return(retcode)
+
+ 
+if __name__ == "__main__":
+    main()