Mercurial > repos > azuzolo > qiime1_3_0
comparison qiime_wrapper.py @ 2:d80000f5ad20 draft
Uploaded
| author | azuzolo |
|---|---|
| date | Wed, 06 Jun 2012 16:17:36 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:2c9714f56480 | 2:d80000f5ad20 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import logging, os, string, sys, tempfile, glob, shutil, types, urllib, optparse, re | |
| 3 import shlex, subprocess | |
| 4 | |
| 5 """ | |
| 6 sys.argv | |
| 7 this --galaxy_datasets= --quime_script | |
| 8 | |
| 9 alpha_rarefaction | |
| 10 output html | |
| 11 wf_arare/alpha_rarefaction_plots/rarefaction_plots.html | |
| 12 wf_arare/alpha_rarefaction_plots/html_plots/ | |
| 13 wf_arare/alpha_div | |
| 14 wf_arare/alpha_div/alpha_rarefaction_101_0.txt | |
| 15 | |
| 16 --galaxy_summary_html=$output_html | |
| 17 --galaxy_summary_template=$output_template | |
| 18 --galaxy_summary_links='label:link,label:link' | |
| 19 --galaxy_outputdir=$output_html.extra_files_path | |
| 20 | |
| 21 | |
| 22 """ | |
| 23 | |
| 24 def stop_err( msg ): | |
| 25 sys.stderr.write( "%s\n" % msg ) | |
| 26 sys.exit() | |
| 27 | |
| 28 def __main__(): | |
| 29 debug = False | |
| 30 tmp_dir = None | |
| 31 inputdir = None | |
| 32 outputdir = None | |
| 33 dataset_patterns = None | |
| 34 datasetid = None | |
| 35 new_dataset_patterns = None | |
| 36 new_files_path = None | |
| 37 summary_html=None | |
| 38 summary_template=None | |
| 39 summary_links=None | |
| 40 ## adds "log file" printing capabilities for primary output in dynamic file output | |
| 41 logfile = None | |
| 42 ## added support for correcting file extensions | |
| 43 newext = None | |
| 44 extchange = None | |
| 45 ## check if there are files to generate | |
| 46 cmd_args = [] | |
| 47 for arg in sys.argv[1:]: | |
| 48 if arg.startswith('--galaxy_'): | |
| 49 (opt,val) = arg.split('=') if arg.find('=') > 0 else (arg,None) | |
| 50 ''' | |
| 51 if opt == '--galaxy_tmpdir': | |
| 52 try: | |
| 53 if not os.path.exists(val): | |
| 54 os.makedirs(val) | |
| 55 tmp_dir = val | |
| 56 except Exception, ex: | |
| 57 stop_err(ex) | |
| 58 ''' | |
| 59 if opt == '--galaxy_outputdir': | |
| 60 try: | |
| 61 if not os.path.exists(val): | |
| 62 os.makedirs(val) | |
| 63 outputdir = val | |
| 64 except Exception, ex: | |
| 65 stop_err(ex) | |
| 66 if opt == '--galaxy_datasets': | |
| 67 dataset_patterns = val.split(',') | |
| 68 if opt == '--galaxy_datasetid': | |
| 69 datasetid = val | |
| 70 if opt == '--galaxy_new_datasets': | |
| 71 new_dataset_patterns = val.split(',') | |
| 72 if opt == '--galaxy_new_files_path': | |
| 73 if not os.path.exists(val): | |
| 74 os.makedirs(val) | |
| 75 new_files_path = val | |
| 76 if opt == '--galaxy_summary_html': | |
| 77 summary_html=val | |
| 78 if opt == '--galaxy_summary_template': | |
| 79 summary_template=val | |
| 80 if opt == '--galaxy_summary_links': | |
| 81 summary_links=val | |
| 82 if opt == '--galaxy_debug': | |
| 83 debug = True | |
| 84 if opt == '--galaxy_logfile': | |
| 85 logfile = val | |
| 86 if opt == '--galaxy_ext_change': | |
| 87 extchange = val | |
| 88 if opt == '--galaxy_new_ext': | |
| 89 newext = val | |
| 90 if opt == '--galaxy_inputdir': | |
| 91 inputdir = val | |
| 92 else: | |
| 93 cmd_args.append(arg) | |
| 94 if debug: print >> sys.stdout, '\n : '.join(cmd_args) | |
| 95 try: | |
| 96 stderr = '' | |
| 97 # allow for changing of file extension for files which require it | |
| 98 if extchange != None and inputdir != None and newext != None: | |
| 99 #newfile = os.path.join(inputdir,"temporary."+newext) | |
| 100 try: | |
| 101 os.link(extchange,inputdir+"/temporary."+newext) | |
| 102 except: | |
| 103 shutil.copy2(extchange,inputdir+"/temporary."+newext) | |
| 104 cmdline = ' '.join(cmd_args) | |
| 105 if debug: print >> sys.stdout, cmdline | |
| 106 ''' | |
| 107 if tmp_dir == None or not os.path.isdir(tmp_dir): | |
| 108 tmp_dir = tempfile.mkdtemp() | |
| 109 if outputdir == None or not os.path.isdir(outputdir): | |
| 110 outputdir = tmp_dir | |
| 111 ''' | |
| 112 tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name | |
| 113 tmp_stderr = open( tmp_stderr_name, 'wb' ) | |
| 114 tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.out' ).name | |
| 115 tmp_stdout = open( tmp_stdout_name, 'wb' ) | |
| 116 proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() ) | |
| 117 returncode = proc.wait() | |
| 118 tmp_stderr.close() | |
| 119 # get stderr, allowing for case where it's very large | |
| 120 tmp_stderr = open( tmp_stderr_name, 'rb' ) | |
| 121 buffsize = 1048576 | |
| 122 try: | |
| 123 while True: | |
| 124 stderr += tmp_stderr.read( buffsize ) | |
| 125 if not stderr or len( stderr ) % buffsize != 0: | |
| 126 break | |
| 127 if debug: print >> sys.stderr, stderr | |
| 128 except OverflowError: | |
| 129 pass | |
| 130 tmp_stderr.close() | |
| 131 if returncode != 0: | |
| 132 if debug: print >> sys.stderr, "returncode = %d" % returncode | |
| 133 raise Exception, stderr | |
| 134 #raise Exception, sys.stderr | |
| 135 # collect results | |
| 136 if dataset_patterns != None: | |
| 137 for root, dirs, files in os.walk(outputdir): | |
| 138 for fname in files: | |
| 139 fpath = os.path.join(root,fname) | |
| 140 if dataset_patterns != None: | |
| 141 for output in dataset_patterns: | |
| 142 (pattern,path) = output.split(':') | |
| 143 if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) | |
| 144 if path == None or path == 'None': | |
| 145 continue | |
| 146 if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) | |
| 147 if re.match(pattern,fname): | |
| 148 found = True | |
| 149 # flist.remove(fname) | |
| 150 try: | |
| 151 shutil.copy2(fpath, path) | |
| 152 if new_files_path != None: | |
| 153 os.link(fpath, os.path.join(new_files_path,fname)) | |
| 154 except Exception, ex: | |
| 155 stop_err('%s' % ex) | |
| 156 # move result to outdir | |
| 157 # Need to flatten the dir hierachy in order for galaxy to serve the href links | |
| 158 if summary_html != None: | |
| 159 """ | |
| 160 for root, dirs, files in os.walk(outputdir): | |
| 161 if root != outputdir: | |
| 162 for fname in files: | |
| 163 fpath = os.path.join(root,fname) | |
| 164 """ | |
| 165 ## move everything up one level | |
| 166 dlist = os.listdir(outputdir) | |
| 167 for dname in dlist: | |
| 168 dpath = os.path.join(outputdir,dname) | |
| 169 if os.path.isdir(dpath): | |
| 170 flist = os.listdir(dpath) | |
| 171 for fname in flist: | |
| 172 fpath = os.path.join(dpath,fname) | |
| 173 shutil.move(fpath,outputdir) | |
| 174 if summary_template != None: | |
| 175 shutil.copy(summary_template,summary_html) | |
| 176 """ | |
| 177 flist = os.listdir(outputdir) | |
| 178 if debug: print >> sys.stdout, 'outputdir: %s' % outputdir | |
| 179 if debug: print >> sys.stdout, 'files: %s' % ','.join(flist) | |
| 180 if dataset_patterns != None: | |
| 181 for output in dataset_patterns: | |
| 182 (pattern,path) = output.split(':') | |
| 183 if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) | |
| 184 if path == None or path == 'None': | |
| 185 continue | |
| 186 for fname in flist: | |
| 187 if debug: print >> sys.stdout, 'outdir %s match: %s' % (fname,re.match(pattern,fname)) | |
| 188 if re.match(pattern,fname): | |
| 189 found = True | |
| 190 flist.remove(fname) | |
| 191 fpath = os.path.join(outputdir,fname) | |
| 192 try: | |
| 193 shutil.copy2(fpath, path) | |
| 194 except Exception, ex: | |
| 195 stop_err('%s' % ex) | |
| 196 """ | |
| 197 # Handle the dynamically generated galaxy datasets | |
| 198 # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput | |
| 199 # --new_datasets = specifies files to be found in the new_file_path | |
| 200 # The list items are separated by commas | |
| 201 # Each item conatins: a regex pattern for matching filenames and a galaxy datatype (separated by :) | |
| 202 # The regex match.groups()[0] is used as the id name of the dataset, and must result in unique name for each output | |
| 203 # The --galaxy_output flag is used for instances where data needs to be copied to the extra_files_path for later | |
| 204 # directory use | |
| 205 if new_dataset_patterns != None and new_files_path != None and datasetid != None: | |
| 206 for output in new_dataset_patterns: | |
| 207 if ':' in output: pattern,ext = output.split(':',1) | |
| 208 flist = os.listdir(new_files_path) | |
| 209 for fname in flist: | |
| 210 m = re.match(pattern,fname) | |
| 211 if m: | |
| 212 fpath = os.path.join(new_files_path,fname) | |
| 213 if len(m.groups()) > 0: | |
| 214 root = m.groups()[0] | |
| 215 else: | |
| 216 # remove the ext from the name if it exists, galaxy will add back later | |
| 217 # remove underscores since galaxy uses that as a field separator for dynamic datasets | |
| 218 root = re.sub('\.?'+ext+'$','',fname).replace('_','').replace('.','') | |
| 219 # filename pattern required by galaxy | |
| 220 fn = "%s_%s_%s_%s_%s" % ( 'primary', datasetid, root, 'visible', ext ) | |
| 221 if debug: print >> sys.stdout, '> %s' % fpath | |
| 222 if debug: print >> sys.stdout, '< %s' % os.path.join(new_files_path,fn) | |
| 223 try: | |
| 224 os.link(fpath, os.path.join(new_files_path,fn)) | |
| 225 # needed for files with variable output and a directory structure | |
| 226 if outputdir != None: | |
| 227 os.link(fpath, os.path.join(outputdir,fname)) | |
| 228 # clean out files from tmp directory, may be unnecessary | |
| 229 #os.remove(fpath) | |
| 230 except: | |
| 231 shutil.copy2(fpath, os.path.join(new_files_path,fn)) | |
| 232 # needed for files with variable output and a directory structure | |
| 233 if outputdir != None: | |
| 234 os.link(fpath, os.path.join(outputdir,fname)) | |
| 235 | |
| 236 print "bob" + logfile | |
| 237 ''' | |
| 238 if logfile != None: | |
| 239 print "bleep" | |
| 240 if outputdir != None: | |
| 241 print "beep" | |
| 242 logwrite = open(logfile, 'w+') | |
| 243 logwrite.write('Tool started. Files created by tool: \n') | |
| 244 flist = os.listdir(outputdir) | |
| 245 for fname in flist: | |
| 246 if 'DS_Store' not in fname: | |
| 247 logwrite.write(fname+'\n') | |
| 248 logwrite.write('Tool Finished.') | |
| 249 logwrite.close() | |
| 250 if new_files_path != None: | |
| 251 print "boop" | |
| 252 logwrite = open(logfile, 'w+') | |
| 253 if len(logfile.readline() > 0): | |
| 254 logwrite.write('Tool started. Files created by tool: \n') | |
| 255 flist = os.listdir(new_files_path) | |
| 256 for fname in flist: | |
| 257 if 'DS_Store' not in fname: | |
| 258 logwrite.write(fname+'\n') | |
| 259 logwrite.write('Tool Finished.') | |
| 260 logwrite.close() | |
| 261 ''' | |
| 262 except Exception, e: | |
| 263 msg = str(e) + stderr | |
| 264 #msg = str(e) + str(sys.stderr) | |
| 265 #stop_err( 'Error running ' + msg) | |
| 266 finally: | |
| 267 # Only remove temporary directories and files from temporary directory | |
| 268 # Enclose in try block, so we don't report error on stale nfs handles | |
| 269 try: | |
| 270 if logfile != None: | |
| 271 if outputdir != None: | |
| 272 logwrite = open(logfile, 'r+') | |
| 273 logwrite.write('Tool started. Files created by tool: \n') | |
| 274 flist = os.listdir(outputdir) | |
| 275 for fname in flist: | |
| 276 if 'DS_Store' not in fname and 'primary' not in fname: | |
| 277 logwrite.write(fname+'\n') | |
| 278 logwrite.write('Tool Finished.') | |
| 279 logwrite.close() | |
| 280 if new_files_path != None: | |
| 281 logwrite = open(logfile, 'r+') | |
| 282 logwrite.write('Tool started. Files created by tool: \n') | |
| 283 flist = os.listdir(new_files_path) | |
| 284 for fname in flist: | |
| 285 if 'DS_Store' not in fname and 'primary' not in fname: | |
| 286 logwrite.write(fname+'\n') | |
| 287 logwrite.write('Tool Finished.') | |
| 288 logwrite.close() | |
| 289 if tmp_dir != None and os.path.exists(tmp_dir) and os.path.isfile(tmp_dir): | |
| 290 #shutil.rmtree(tmp_dir) | |
| 291 pass | |
| 292 if outputdir != None and 'files' not in outputdir: | |
| 293 flist = os.listdir(outputdir) | |
| 294 for fname in flist: | |
| 295 if 'DS_Store' not in fname and 'primary' not in fname: | |
| 296 os.remove(os.path.join(outputdir,fname)) | |
| 297 if inputdir != None and 'files' not in inputdir: | |
| 298 flist = os.listdir(inputdir) | |
| 299 for fname in flist: | |
| 300 if 'DS_Store' not in fname and 'primary' not in fname: | |
| 301 os.remove(os.path.join(inputdir,fname)) | |
| 302 if new_files_path != None and 'files' not in new_files_path: | |
| 303 flist = os.listdir(new_files_path) | |
| 304 for fname in flist: | |
| 305 if 'DS_Store' not in fname and 'primary' not in fname: | |
| 306 os.remove(os.path.join(new_files_path,fname)) | |
| 307 | |
| 308 except: | |
| 309 pass | |
| 310 | |
| 311 if __name__ == "__main__": __main__() | |
| 312 |
