view mpagenomics_normalize-7dc6ce39fb89/preprocess.py @ 0:84b13b0e2b85

Uploaded
author sblanck
date Thu, 07 May 2015 08:22:36 -0400
parents
children
line wrap: on
line source

import os
import re
import shutil
import sys
import subprocess
import zipfile
import optparse

def main():
    
    parser = optparse.OptionParser()
    parser.add_option('-s', action="store", dest='summary')
    parser.add_option('-e', action="store", dest='dataSetName')
    parser.add_option('-p', action="store", dest='new_file_path')
    parser.add_option('-c', action="store", dest='inputcdffull_name')
    parser.add_option('-f', action="store", dest='inputufl_name')
    parser.add_option('-g', action="store", dest='inputugp_name')
    parser.add_option('-a', action="store", dest='inputacs_name')
    parser.add_option('-d', action="store", dest='inputcdffull')
    parser.add_option('-v', action="store", dest='inputufl')
    parser.add_option('-w', action="store", dest='inputugp')
    parser.add_option('-b', action="store", dest='inputacs')
    parser.add_option('-t', action="store", dest='tumorcsv')
    parser.add_option('-y', action="store", dest='settingsType')
    parser.add_option('-o', action="store", dest='outputgraph')
    parser.add_option('-z', action="store", dest='zipfigures')
    parser.add_option('-k', action="store", dest='outputlog')
    parser.add_option('-l', action="store", dest='log')
    parser.add_option('-u', action="store", dest='user_id')

    parser.add_option('-i', action="append", dest='inputFile', default=[])
    parser.add_option('-n', action='append', dest='inputFileName', default=[])
        
    options, args = parser.parse_args()
        
    dataSetName=options.dataSetName
    destinationPath=os.path.join(options.new_file_path, options.user_id, dataSetName)
    
    mpagenomics_dir = os.path.join(options.new_file_path,"mpagenomics",options.user_id)
    data_dir = os.path.join(options.new_file_path, options.user_id)
    
    try:
        os.makedirs(data_dir)
    except:
        shutil.rmtree(data_dir)
        os.makedirs(data_dir)
    
    if (not os.path.isdir(mpagenomics_dir)):
        os.makedirs(mpagenomics_dir)
    
    for inputFile, inputFileName in zip(options.inputFile,options.inputFileName):
        source = inputFile
        destination=os.path.join(data_dir,inputFileName)
        _copy(source,destination)
    
    
    cdffull_name=options.inputcdffull_name
    if (cdffull_name.count(",") != 0):    
        chipType=cdffull_name.split(",",1)[0]
        tagExt=cdffull_name.split(",",1)[1]
        tag=tagExt.split(".",1)[0]
    else:
        chipType=cdffull_name.split(".",1)[0]
        tag=""
         
    _copy(options.inputcdffull,os.path.join(data_dir, options.inputcdffull_name))
    _copy(options.inputugp,os.path.join(data_dir, options.inputugp_name))
    _copy(options.inputufl,os.path.join(data_dir, options.inputufl_name))
    _copy(options.inputacs,os.path.join(data_dir, options.inputacs_name))
     
    
    fig_dir = os.path.join("mpagenomics", options.user_id, "figures", dataSetName, "signal")
    abs_fig_dir = os.path.join(options.new_file_path, fig_dir)
        
     
    retcode = _preprocess(chipType, dataSetName, mpagenomics_dir, data_dir, options.new_file_path, options.tumorcsv, options.settingsType, options.outputgraph, options.outputlog, options.log, tag)
         
    if (retcode == 0):
        if (os.path.isdir(abs_fig_dir)) and (options.outputgraph == "TRUE"):
         
            new_files = os.listdir(abs_fig_dir)
            zipbuf = zipfile.ZipFile(os.path.join(abs_fig_dir, options.zipfigures), 'w', zipfile.ZIP_DEFLATED)
            for current_file in new_files:
                fn = os.path.join(abs_fig_dir, current_file)
                relfn = fn[len(abs_fig_dir) + len(os.sep):]
                zipbuf.write(fn, relfn)
         
        f = open(options.summary, "w")   
        # Create report
        try:
            for inputFileName in options.inputFileName:
                f.write("%s\t%s\t%s\n" %(inputFileName,dataSetName,chipType))
        finally:
            shutil.rmtree(data_dir)
            f.close()
         
        sys.exit(retcode)
         
    sys.exit(retcode)
     
    
def _copy(source, destination):
    try:
        os.symlink(source, destination)
    except:
        shutil.copy(source, destination)

def _preprocess (chipType,dataset,mpagenomics_dir,data_dir,tmp_dir,tumor,settingType,outputgraph,outputlog,log,tag):
    script_dir=os.path.dirname(os.path.abspath(__file__))
    
    if (outputlog=="TRUE"):
        errfile=open(log,'w')
    else:
        errfile=open(os.path.join(tmp_dir,"errfile.log"),'w')
    
    retcode = subprocess.call(["Rscript", os.path.join(script_dir,"preprocess.R"), chipType, dataset, mpagenomics_dir, data_dir, tumor, settingType, outputgraph, tag], stdout = errfile, stderr = errfile)
    return(retcode)

 
if __name__ == "__main__":
    main()