Mercurial > repos > devteam > picard1106
comparison picard_wrapper.py @ 135:1790970e47fe draft
Uploaded
| author | devteam |
|---|---|
| date | Wed, 26 Feb 2014 17:29:45 -0500 |
| parents | |
| children | b2ca2d181fb4 |
comparison
equal
deleted
inserted
replaced
| 134:522c7184b0f2 | 135:1790970e47fe |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Originally written by Kelly Vincent | |
| 4 pretty output and additional picard wrappers by Ross Lazarus for rgenetics | |
| 5 Runs all available wrapped Picard tools. | |
| 6 usage: picard_wrapper.py [options] | |
| 7 code Ross wrote licensed under the LGPL | |
| 8 see http://www.gnu.org/copyleft/lesser.html | |
| 9 """ | |
| 10 | |
| 11 import optparse, os, sys, subprocess, tempfile, shutil, time, logging | |
| 12 | |
| 13 galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> | |
| 14 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
| 15 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
| 16 <head> | |
| 17 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
| 18 <meta name="generator" content="Galaxy %s tool output - see http://getgalaxy.org/" /> | |
| 19 <title></title> | |
| 20 <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> | |
| 21 </head> | |
| 22 <body> | |
| 23 <div class="document"> | |
| 24 """ | |
| 25 galhtmlattr = """Galaxy tool %s run at %s</b><br/>""" | |
| 26 galhtmlpostfix = """</div></body></html>\n""" | |
| 27 | |
| 28 | |
| 29 def stop_err( msg ): | |
| 30 sys.stderr.write( '%s\n' % msg ) | |
| 31 sys.exit() | |
| 32 | |
| 33 | |
| 34 def timenow(): | |
| 35 """return current time as a string | |
| 36 """ | |
| 37 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) | |
| 38 | |
| 39 | |
| 40 class PicardBase(): | |
| 41 """ | |
| 42 simple base class with some utilities for Picard | |
| 43 adapted and merged with Kelly Vincent's code april 2011 Ross | |
| 44 lots of changes... | |
| 45 """ | |
| 46 | |
| 47 def __init__(self, opts=None,arg0=None): | |
| 48 """ common stuff needed at init for a picard tool | |
| 49 """ | |
| 50 assert opts <> None, 'PicardBase needs opts at init' | |
| 51 self.opts = opts | |
| 52 if self.opts.outdir == None: | |
| 53 self.opts.outdir = os.getcwd() # fixmate has no html file eg so use temp dir | |
| 54 assert self.opts.outdir <> None,'## PicardBase needs a temp directory if no output directory passed in' | |
| 55 self.picname = self.baseName(opts.jar) | |
| 56 if self.picname.startswith('picard'): | |
| 57 self.picname = opts.picard_cmd # special case for some tools like replaceheader? | |
| 58 self.progname = self.baseName(arg0) | |
| 59 self.version = '0.002' | |
| 60 self.delme = [] # list of files to destroy | |
| 61 self.title = opts.title | |
| 62 self.inputfile = opts.input | |
| 63 try: | |
| 64 os.makedirs(opts.outdir) | |
| 65 except: | |
| 66 pass | |
| 67 try: | |
| 68 os.makedirs(opts.tmpdir) | |
| 69 except: | |
| 70 pass | |
| 71 self.log_filename = os.path.join(self.opts.outdir,'%s.log' % self.picname) | |
| 72 self.metricsOut = os.path.join(opts.outdir,'%s.metrics.txt' % self.picname) | |
| 73 self.setLogging(logfname=self.log_filename) | |
| 74 | |
| 75 def baseName(self,name=None): | |
| 76 return os.path.splitext(os.path.basename(name))[0] | |
| 77 | |
| 78 def setLogging(self,logfname="picard_wrapper.log"): | |
| 79 """setup a logger | |
| 80 """ | |
| 81 logging.basicConfig(level=logging.INFO, | |
| 82 filename=logfname, | |
| 83 filemode='a') | |
| 84 | |
| 85 | |
| 86 def readLarge(self,fname=None): | |
| 87 """ read a potentially huge file. | |
| 88 """ | |
| 89 try: | |
| 90 # get stderr, allowing for case where it's very large | |
| 91 tmp = open( fname, 'rb' ) | |
| 92 s = '' | |
| 93 buffsize = 1048576 | |
| 94 try: | |
| 95 while True: | |
| 96 more = tmp.read( buffsize ) | |
| 97 if len(more) > 0: | |
| 98 s += more | |
| 99 else: | |
| 100 break | |
| 101 except OverflowError: | |
| 102 pass | |
| 103 tmp.close() | |
| 104 except Exception, e: | |
| 105 stop_err( 'Read Large Exception : %s' % str( e ) ) | |
| 106 return s | |
| 107 | |
| 108 def runCL(self,cl=None,output_dir=None): | |
| 109 """ construct and run a command line | |
| 110 we have galaxy's temp path as opt.temp_dir so don't really need isolation | |
| 111 sometimes stdout is needed as the output - ugly hacks to deal with potentially vast artifacts | |
| 112 """ | |
| 113 assert cl <> None, 'PicardBase runCL needs a command line as cl' | |
| 114 if output_dir == None: | |
| 115 output_dir = self.opts.outdir | |
| 116 if type(cl) == type([]): | |
| 117 cl = ' '.join(cl) | |
| 118 fd,templog = tempfile.mkstemp(dir=output_dir,suffix='rgtempRun.txt') | |
| 119 tlf = open(templog,'wb') | |
| 120 fd,temperr = tempfile.mkstemp(dir=output_dir,suffix='rgtempErr.txt') | |
| 121 tef = open(temperr,'wb') | |
| 122 process = subprocess.Popen(cl, shell=True, stderr=tef, stdout=tlf, cwd=output_dir) | |
| 123 rval = process.wait() | |
| 124 tlf.close() | |
| 125 tef.close() | |
| 126 stderrs = self.readLarge(temperr) | |
| 127 stdouts = self.readLarge(templog) | |
| 128 if rval > 0: | |
| 129 s = '## executing %s returned status %d and stderr: \n%s\n' % (cl,rval,stderrs) | |
| 130 stdouts = '%s\n%s' % (stdouts,stderrs) | |
| 131 else: | |
| 132 s = '## executing %s returned status %d and nothing on stderr\n' % (cl,rval) | |
| 133 logging.info(s) | |
| 134 os.unlink(templog) # always | |
| 135 os.unlink(temperr) # always | |
| 136 return s, stdouts, rval # sometimes s is an output | |
| 137 | |
| 138 def runPic(self, jar, cl): | |
| 139 """ | |
| 140 cl should be everything after the jar file name in the command | |
| 141 """ | |
| 142 runme = ['java -Xmx%s' % self.opts.maxjheap] | |
| 143 runme.append(" -Djava.io.tmpdir='%s' " % self.opts.tmpdir) | |
| 144 runme.append('-jar %s' % jar) | |
| 145 runme += cl | |
| 146 s,stdouts,rval = self.runCL(cl=runme, output_dir=self.opts.outdir) | |
| 147 return stdouts,rval | |
| 148 | |
| 149 def samToBam(self,infile=None,outdir=None): | |
| 150 """ | |
| 151 use samtools view to convert sam to bam | |
| 152 """ | |
| 153 fd,tempbam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.bam') | |
| 154 cl = ['samtools view -h -b -S -o ',tempbam,infile] | |
| 155 tlog,stdouts,rval = self.runCL(cl,outdir) | |
| 156 return tlog,tempbam,rval | |
| 157 | |
| 158 def sortSam(self, infile=None,outfile=None,outdir=None): | |
| 159 """ | |
| 160 """ | |
| 161 print '## sortSam got infile=%s,outfile=%s,outdir=%s' % (infile,outfile,outdir) | |
| 162 cl = ['samtools sort',infile,outfile] | |
| 163 tlog,stdouts,rval = self.runCL(cl,outdir) | |
| 164 return tlog | |
| 165 | |
| 166 def cleanup(self): | |
| 167 for fname in self.delme: | |
| 168 try: | |
| 169 os.unlink(fname) | |
| 170 except: | |
| 171 pass | |
| 172 | |
| 173 def prettyPicout(self,transpose,maxrows): | |
| 174 """organize picard outpouts into a report html page | |
| 175 """ | |
| 176 res = [] | |
| 177 try: | |
| 178 r = open(self.metricsOut,'r').readlines() | |
| 179 except: | |
| 180 r = [] | |
| 181 if len(r) > 0: | |
| 182 res.append('<b>Picard on line resources</b><ul>\n') | |
| 183 res.append('<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li>\n') | |
| 184 res.append('<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/>\n') | |
| 185 if transpose: | |
| 186 res.append('<b>Picard output (transposed to make it easier to see)</b><hr/>\n') | |
| 187 else: | |
| 188 res.append('<b>Picard output</b><hr/>\n') | |
| 189 res.append('<table cellpadding="3" >\n') | |
| 190 dat = [] | |
| 191 heads = [] | |
| 192 lastr = len(r) - 1 | |
| 193 # special case for estimate library complexity hist | |
| 194 thist = False | |
| 195 for i,row in enumerate(r): | |
| 196 if row.strip() > '': | |
| 197 srow = row.split('\t') | |
| 198 if row.startswith('#'): | |
| 199 heads.append(row.strip()) # want strings | |
| 200 else: | |
| 201 dat.append(srow) # want lists | |
| 202 if row.startswith('## HISTOGRAM'): | |
| 203 thist = True | |
| 204 if len(heads) > 0: | |
| 205 hres = ['<tr class="d%d"><td colspan="2">%s</td></tr>' % (i % 2,x) for i,x in enumerate(heads)] | |
| 206 res += hres | |
| 207 heads = [] | |
| 208 if len(dat) > 0: | |
| 209 if transpose and not thist: | |
| 210 tdat = map(None,*dat) # transpose an arbitrary list of lists | |
| 211 tdat = ['<tr class="d%d"><td>%s</td><td>%s </td></tr>\n' % ((i+len(heads)) % 2,x[0],x[1]) for i,x in enumerate(tdat)] | |
| 212 else: | |
| 213 tdat = ['\t'.join(x).strip() for x in dat] # back to strings :( | |
| 214 tdat = ['<tr class="d%d"><td colspan="2">%s</td></tr>\n' % ((i+len(heads)) % 2,x) for i,x in enumerate(tdat)] | |
| 215 res += tdat | |
| 216 dat = [] | |
| 217 res.append('</table>\n') | |
| 218 return res | |
| 219 | |
| 220 def fixPicardOutputs(self,transpose,maxloglines): | |
| 221 """ | |
| 222 picard produces long hard to read tab header files | |
| 223 make them available but present them transposed for readability | |
| 224 """ | |
| 225 logging.shutdown() | |
| 226 self.cleanup() # remove temp files stored in delme | |
| 227 rstyle="""<style type="text/css"> | |
| 228 tr.d0 td {background-color: oldlace; color: black;} | |
| 229 tr.d1 td {background-color: aliceblue; color: black;} | |
| 230 </style>""" | |
| 231 res = [rstyle,] | |
| 232 res.append(galhtmlprefix % self.progname) | |
| 233 res.append(galhtmlattr % (self.picname,timenow())) | |
| 234 flist = [x for x in os.listdir(self.opts.outdir) if not x.startswith('.')] | |
| 235 pdflist = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'] | |
| 236 if len(pdflist) > 0: # assumes all pdfs come with thumbnail .jpgs | |
| 237 for p in pdflist: | |
| 238 pbase = os.path.splitext(p)[0] # removes .pdf | |
| 239 imghref = '%s.jpg' % pbase | |
| 240 mimghref = '%s-0.jpg' % pbase # multiple pages pdf -> multiple thumbnails without asking! | |
| 241 if mimghref in flist: | |
| 242 imghref=mimghref # only one for thumbnail...it's a multi page pdf | |
| 243 res.append('<table cellpadding="10"><tr><td>\n') | |
| 244 res.append('<a href="%s"><img src="%s" title="Click image preview for a print quality PDF version" hspace="10" align="middle"></a>\n' % (p,imghref)) | |
| 245 res.append('</tr></td></table>\n') | |
| 246 if len(flist) > 0: | |
| 247 res.append('<b>The following output files were created (click the filename to view/download a copy):</b><hr/>') | |
| 248 res.append('<table>\n') | |
| 249 for i,f in enumerate(flist): | |
| 250 fn = os.path.split(f)[-1] | |
| 251 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fn)) | |
| 252 res.append('</table><p/>\n') | |
| 253 pres = self.prettyPicout(transpose,maxloglines) | |
| 254 if len(pres) > 0: | |
| 255 res += pres | |
| 256 l = open(self.log_filename,'r').readlines() | |
| 257 llen = len(l) | |
| 258 if llen > 0: | |
| 259 res.append('<b>Picard Tool Run Log</b><hr/>\n') | |
| 260 rlog = ['<pre>',] | |
| 261 if llen > maxloglines: | |
| 262 n = min(50,int(maxloglines/2)) | |
| 263 rlog += l[:n] | |
| 264 rlog.append('------------ ## %d rows deleted ## --------------\n' % (llen-maxloglines)) | |
| 265 rlog += l[-n:] | |
| 266 else: | |
| 267 rlog += l | |
| 268 rlog.append('</pre>') | |
| 269 if llen > maxloglines: | |
| 270 rlog.append('\n<b>## WARNING - %d log lines truncated - <a href="%s">%s</a> contains entire output</b>' % (llen - maxloglines,self.log_filename,self.log_filename)) | |
| 271 res += rlog | |
| 272 else: | |
| 273 res.append("### Odd, Picard left no log file %s - must have really barfed badly?\n" % self.log_filename) | |
| 274 res.append('<hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n') | |
| 275 res.append( 'generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool') | |
| 276 res.append(galhtmlpostfix) | |
| 277 outf = open(self.opts.htmlout,'w') | |
| 278 outf.write(''.join(res)) | |
| 279 outf.write('\n') | |
| 280 outf.close() | |
| 281 | |
| 282 def makePicInterval(self,inbed=None,outf=None): | |
| 283 """ | |
| 284 picard wants bait and target files to have the same header length as the incoming bam/sam | |
| 285 a meaningful (ie accurate) representation will fail because of this - so this hack | |
| 286 it would be far better to be able to supply the original bed untouched | |
| 287 Additional checking added Ross Lazarus Dec 2011 to deal with two 'bug' reports on the list | |
| 288 """ | |
| 289 assert inbed <> None | |
| 290 bed = open(inbed,'r').readlines() | |
| 291 sbed = [x.split('\t') for x in bed] # lengths MUST be 5 | |
| 292 lens = [len(x) for x in sbed] | |
| 293 strands = [x[3] for x in sbed if not x[3] in ['+','-']] | |
| 294 maxl = max(lens) | |
| 295 minl = min(lens) | |
| 296 e = [] | |
| 297 if maxl <> minl: | |
| 298 e.append("## Input error: Inconsistent field count in %s - please read the documentation on bait/target format requirements, fix and try again" % inbed) | |
| 299 if maxl <> 5: | |
| 300 e.append("## Input error: %d fields found in %s, 5 required - please read the warning and documentation on bait/target format requirements, fix and try again" % (maxl,inbed)) | |
| 301 if len(strands) > 0: | |
| 302 e.append("## Input error: Fourth column in %s is not the required strand (+ or -) - please read the warning and documentation on bait/target format requirements, fix and try again" % (inbed)) | |
| 303 if len(e) > 0: # write to stderr and quit | |
| 304 print >> sys.stderr, '\n'.join(e) | |
| 305 sys.exit(1) | |
| 306 thead = os.path.join(self.opts.outdir,'tempSamHead.txt') | |
| 307 if self.opts.datatype == 'sam': | |
| 308 cl = ['samtools view -H -S',self.opts.input,'>',thead] | |
| 309 else: | |
| 310 cl = ['samtools view -H',self.opts.input,'>',thead] | |
| 311 self.runCL(cl=cl,output_dir=self.opts.outdir) | |
| 312 head = open(thead,'r').readlines() | |
| 313 s = '## got %d rows of header\n' % (len(head)) | |
| 314 logging.info(s) | |
| 315 o = open(outf,'w') | |
| 316 o.write(''.join(head)) | |
| 317 o.write(''.join(bed)) | |
| 318 o.close() | |
| 319 return outf | |
| 320 | |
| 321 def cleanSam(self, insam=None, newsam=None, picardErrors=[],outformat=None): | |
| 322 """ | |
| 323 interesting problem - if paired, must remove mate pair of errors too or we have a new set of errors after cleaning - missing mate pairs! | |
| 324 Do the work of removing all the error sequences | |
| 325 pysam is cool | |
| 326 infile = pysam.Samfile( "-", "r" ) | |
| 327 outfile = pysam.Samfile( "-", "w", template = infile ) | |
| 328 for s in infile: outfile.write(s) | |
| 329 | |
| 330 errors from ValidateSameFile.jar look like | |
| 331 WARNING: Record 32, Read name SRR006041.1202260, NM tag (nucleotide differences) is missing | |
| 332 ERROR: Record 33, Read name SRR006041.1042721, Empty sequence dictionary. | |
| 333 ERROR: Record 33, Read name SRR006041.1042721, RG ID on SAMRecord not found in header: SRR006041 | |
| 334 | |
| 335 """ | |
| 336 assert os.path.isfile(insam), 'rgPicardValidate cleansam needs an input sam file - cannot find %s' % insam | |
| 337 assert newsam <> None, 'rgPicardValidate cleansam needs an output new sam file path' | |
| 338 removeNames = [x.split(',')[1].replace(' Read name ','') for x in picardErrors if len(x.split(',')) > 2] | |
| 339 remDict = dict(zip(removeNames,range(len(removeNames)))) | |
| 340 infile = pysam.Samfile(insam,'rb') | |
| 341 info = 'found %d error sequences in picardErrors, %d unique' % (len(removeNames),len(remDict)) | |
| 342 if len(removeNames) > 0: | |
| 343 outfile = pysam.Samfile(newsam,'wb',template=infile) # template must be an open file | |
| 344 i = 0 | |
| 345 j = 0 | |
| 346 for row in infile: | |
| 347 dropme = remDict.get(row.qname,None) # keep if None | |
| 348 if not dropme: | |
| 349 outfile.write(row) | |
| 350 j += 1 | |
| 351 else: # discard | |
| 352 i += 1 | |
| 353 info = '%s\n%s' % (info, 'Discarded %d lines writing %d to %s from %s' % (i,j,newsam,insam)) | |
| 354 outfile.close() | |
| 355 infile.close() | |
| 356 else: # we really want a nullop or a simple pointer copy | |
| 357 infile.close() | |
| 358 if newsam: | |
| 359 shutil.copy(insam,newsam) | |
| 360 logging.info(info) | |
| 361 | |
| 362 | |
| 363 | |
| 364 def __main__(): | |
| 365 doFix = False # tools returning htmlfile don't need this | |
| 366 doTranspose = True # default | |
| 367 maxloglines = 100 # default | |
| 368 #Parse Command Line | |
| 369 op = optparse.OptionParser() | |
| 370 # All tools | |
| 371 op.add_option('-i', '--input', dest='input', help='Input SAM or BAM file' ) | |
| 372 op.add_option('-e', '--inputext', default=None) | |
| 373 op.add_option('-o', '--output', default=None) | |
| 374 op.add_option('-n', '--title', default="Pick a Picard Tool") | |
| 375 op.add_option('-t', '--htmlout', default=None) | |
| 376 op.add_option('-d', '--outdir', default=None) | |
| 377 op.add_option('-x', '--maxjheap', default='3000m') | |
| 378 op.add_option('-b', '--bisulphite', default='false') | |
| 379 op.add_option('-s', '--sortorder', default='query') | |
| 380 op.add_option('','--tmpdir', default='/tmp') | |
| 381 op.add_option('-j','--jar',default='') | |
| 382 op.add_option('','--picard-cmd',default=None) | |
| 383 # Many tools | |
| 384 op.add_option( '', '--output-format', dest='output_format', help='Output format' ) | |
| 385 op.add_option( '', '--bai-file', dest='bai_file', help='The path to the index file for the input bam file' ) | |
| 386 op.add_option( '', '--ref', dest='ref', help='Built-in reference with fasta and dict file', default=None ) | |
| 387 # CreateSequenceDictionary | |
| 388 op.add_option( '', '--ref-file', dest='ref_file', help='Fasta to use as reference', default=None ) | |
| 389 op.add_option( '', '--species-name', dest='species_name', help='Species name to use in creating dict file from fasta file' ) | |
| 390 op.add_option( '', '--build-name', dest='build_name', help='Name of genome assembly to use in creating dict file from fasta file' ) | |
| 391 op.add_option( '', '--trunc-names', dest='trunc_names', help='Truncate sequence names at first whitespace from fasta file' ) | |
| 392 # MarkDuplicates | |
| 393 op.add_option( '', '--remdups', default='true', help='Remove duplicates from output file' ) | |
| 394 op.add_option( '', '--optdupdist', default="100", help='Maximum pixels between two identical sequences in order to consider them optical duplicates.' ) | |
| 395 # CollectInsertSizeMetrics | |
| 396 op.add_option('', '--taillimit', default="0") | |
| 397 op.add_option('', '--histwidth', default="0") | |
| 398 op.add_option('', '--minpct', default="0.01") | |
| 399 op.add_option('', '--malevel', default='') | |
| 400 op.add_option('', '--deviations', default="0.0") | |
| 401 # CollectAlignmentSummaryMetrics | |
| 402 op.add_option('', '--maxinsert', default="20") | |
| 403 op.add_option('', '--adaptors', default='') | |
| 404 # FixMateInformation and validate | |
| 405 # CollectGcBiasMetrics | |
| 406 op.add_option('', '--windowsize', default='100') | |
| 407 op.add_option('', '--mingenomefrac', default='0.00001') | |
| 408 # AddOrReplaceReadGroups | |
| 409 op.add_option( '', '--rg-opts', dest='rg_opts', help='Specify extra (optional) arguments with full, otherwise preSet' ) | |
| 410 op.add_option( '', '--rg-lb', dest='rg_library', help='Read Group Library' ) | |
| 411 op.add_option( '', '--rg-pl', dest='rg_platform', help='Read Group platform (e.g. illumina, solid)' ) | |
| 412 op.add_option( '', '--rg-pu', dest='rg_plat_unit', help='Read Group platform unit (eg. run barcode) ' ) | |
| 413 op.add_option( '', '--rg-sm', dest='rg_sample', help='Read Group sample name' ) | |
| 414 op.add_option( '', '--rg-id', dest='rg_id', help='Read Group ID' ) | |
| 415 op.add_option( '', '--rg-cn', dest='rg_seq_center', help='Read Group sequencing center name' ) | |
| 416 op.add_option( '', '--rg-ds', dest='rg_desc', help='Read Group description' ) | |
| 417 # ReorderSam | |
| 418 op.add_option( '', '--allow-inc-dict-concord', dest='allow_inc_dict_concord', help='Allow incomplete dict concordance' ) | |
| 419 op.add_option( '', '--allow-contig-len-discord', dest='allow_contig_len_discord', help='Allow contig length discordance' ) | |
| 420 # ReplaceSamHeader | |
| 421 op.add_option( '', '--header-file', dest='header_file', help='sam or bam file from which header will be read' ) | |
| 422 | |
| 423 op.add_option('','--assumesorted', default='true') | |
| 424 op.add_option('','--readregex', default="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*") | |
| 425 #estimatelibrarycomplexity | |
| 426 op.add_option('','--minid', default="5") | |
| 427 op.add_option('','--maxdiff', default="0.03") | |
| 428 op.add_option('','--minmeanq', default="20") | |
| 429 #hsmetrics | |
| 430 op.add_option('','--baitbed', default=None) | |
| 431 op.add_option('','--targetbed', default=None) | |
| 432 #validate | |
| 433 op.add_option('','--ignoreflags', action='append', type="string") | |
| 434 op.add_option('','--maxerrors', default=None) | |
| 435 op.add_option('','--datatype', default=None) | |
| 436 op.add_option('','--bamout', default=None) | |
| 437 op.add_option('','--samout', default=None) | |
| 438 #downsample | |
| 439 op.add_option('','--probability', default="1") | |
| 440 op.add_option('','--seed', default="1") | |
| 441 #meanqualitybycycle | |
| 442 op.add_option('','--pfreadsonly', default='false') | |
| 443 op.add_option('','--alignedreadsonly', default='false') | |
| 444 #collectrnaseqmetrics | |
| 445 op.add_option('','--ignoreseq', action='append', type="string") | |
| 446 op.add_option('','--minlength', default=None) | |
| 447 op.add_option('','--refflat', default=None) | |
| 448 op.add_option('','--strandspecificity', default=None) | |
| 449 op.add_option('','--ribosomalintervals', default='null') | |
| 450 op.add_option('','--rrnafragmentpercentage', default=None) | |
| 451 | |
| 452 opts, args = op.parse_args() | |
| 453 opts.sortme = opts.assumesorted == 'false' | |
| 454 assert opts.input <> None | |
| 455 # need to add | |
| 456 # instance that does all the work | |
| 457 pic = PicardBase(opts,sys.argv[0]) | |
| 458 | |
| 459 tmp_dir = opts.outdir | |
| 460 haveTempout = False # we use this where sam output is an option | |
| 461 rval = 0 | |
| 462 stdouts = 'Not run yet' | |
| 463 # set ref and dict files to use (create if necessary) | |
| 464 ref_file_name = opts.ref | |
| 465 if opts.ref_file <> None: | |
| 466 csd = 'CreateSequenceDictionary' | |
| 467 realjarpath = os.path.split(opts.jar)[0] | |
| 468 jarpath = os.path.join(realjarpath,'%s.jar' % csd) # for refseq | |
| 469 tmp_ref_fd, tmp_ref_name = tempfile.mkstemp( dir=opts.tmpdir , prefix = pic.picname) | |
| 470 ref_file_name = '%s.fasta' % tmp_ref_name | |
| 471 # build dict | |
| 472 dict_file_name = '%s.dict' % tmp_ref_name | |
| 473 os.symlink( opts.ref_file, ref_file_name ) | |
| 474 cl = ['REFERENCE=%s' % ref_file_name] | |
| 475 cl.append('OUTPUT=%s' % dict_file_name) | |
| 476 cl.append('URI=%s' % os.path.basename( opts.ref_file )) | |
| 477 cl.append('TRUNCATE_NAMES_AT_WHITESPACE=%s' % opts.trunc_names) | |
| 478 if opts.species_name: | |
| 479 cl.append('SPECIES=%s' % opts.species_name) | |
| 480 if opts.build_name: | |
| 481 cl.append('GENOME_ASSEMBLY=%s' % opts.build_name) | |
| 482 pic.delme.append(dict_file_name) | |
| 483 pic.delme.append(ref_file_name) | |
| 484 pic.delme.append(tmp_ref_name) | |
| 485 stdouts,rval = pic.runPic(jarpath, cl) | |
| 486 # run relevant command(s) | |
| 487 | |
| 488 # define temporary output | |
| 489 # if output is sam, it must have that extension, otherwise bam will be produced | |
| 490 # specify sam or bam file with extension | |
| 491 if opts.output_format == 'sam': | |
| 492 suff = '.sam' | |
| 493 else: | |
| 494 suff = '' | |
| 495 tmp_fd, tempout = tempfile.mkstemp( dir=opts.tmpdir, suffix=suff ) | |
| 496 | |
| 497 cl = ['VALIDATION_STRINGENCY=LENIENT',] | |
| 498 | |
| 499 if pic.picname == 'AddOrReplaceReadGroups': | |
| 500 # sort order to match Galaxy's default | |
| 501 cl.append('SORT_ORDER=coordinate') | |
| 502 # input | |
| 503 cl.append('INPUT=%s' % opts.input) | |
| 504 # outputs | |
| 505 cl.append('OUTPUT=%s' % tempout) | |
| 506 # required read groups | |
| 507 cl.append('RGLB="%s"' % opts.rg_library) | |
| 508 cl.append('RGPL="%s"' % opts.rg_platform) | |
| 509 cl.append('RGPU="%s"' % opts.rg_plat_unit) | |
| 510 cl.append('RGSM="%s"' % opts.rg_sample) | |
| 511 if opts.rg_id: | |
| 512 cl.append('RGID="%s"' % opts.rg_id) | |
| 513 # optional read groups | |
| 514 if opts.rg_seq_center: | |
| 515 cl.append('RGCN="%s"' % opts.rg_seq_center) | |
| 516 if opts.rg_desc: | |
| 517 cl.append('RGDS="%s"' % opts.rg_desc) | |
| 518 stdouts,rval = pic.runPic(opts.jar, cl) | |
| 519 haveTempout = True | |
| 520 | |
| 521 elif pic.picname == 'BamIndexStats': | |
| 522 tmp_fd, tmp_name = tempfile.mkstemp( dir=tmp_dir ) | |
| 523 tmp_bam_name = '%s.bam' % tmp_name | |
| 524 tmp_bai_name = '%s.bai' % tmp_bam_name | |
| 525 os.symlink( opts.input, tmp_bam_name ) | |
| 526 os.symlink( opts.bai_file, tmp_bai_name ) | |
| 527 cl.append('INPUT=%s' % ( tmp_bam_name )) | |
| 528 pic.delme.append(tmp_bam_name) | |
| 529 pic.delme.append(tmp_bai_name) | |
| 530 pic.delme.append(tmp_name) | |
| 531 stdouts,rval = pic.runPic( opts.jar, cl ) | |
| 532 f = open(pic.metricsOut,'a') | |
| 533 f.write(stdouts) # got this on stdout from runCl | |
| 534 f.write('\n') | |
| 535 f.close() | |
| 536 doTranspose = False # but not transposed | |
| 537 | |
| 538 elif pic.picname == 'EstimateLibraryComplexity': | |
| 539 cl.append('I=%s' % opts.input) | |
| 540 cl.append('O=%s' % pic.metricsOut) | |
| 541 if float(opts.minid) > 0: | |
| 542 cl.append('MIN_IDENTICAL_BASES=%s' % opts.minid) | |
| 543 if float(opts.maxdiff) > 0.0: | |
| 544 cl.append('MAX_DIFF_RATE=%s' % opts.maxdiff) | |
| 545 if float(opts.minmeanq) > 0: | |
| 546 cl.append('MIN_MEAN_QUALITY=%s' % opts.minmeanq) | |
| 547 if opts.readregex > '': | |
| 548 cl.append('READ_NAME_REGEX="%s"' % opts.readregex) | |
| 549 if float(opts.optdupdist) > 0: | |
| 550 cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist) | |
| 551 stdouts,rval = pic.runPic(opts.jar, cl) | |
| 552 | |
| 553 elif pic.picname == 'CollectAlignmentSummaryMetrics': | |
| 554 # Why do we do this fakefasta thing? | |
| 555 # Because we need NO fai to be available or picard barfs unless it matches the input data. | |
| 556 # why? Dunno Seems to work without complaining if the .bai file is AWOL.... | |
| 557 fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name)) | |
| 558 try: | |
| 559 os.symlink(ref_file_name,fakefasta) | |
| 560 except: | |
| 561 s = '## unable to symlink %s to %s - different devices? Will shutil.copy' | |
| 562 info = s | |
| 563 shutil.copy(ref_file_name,fakefasta) | |
| 564 pic.delme.append(fakefasta) | |
| 565 cl.append('ASSUME_SORTED=true') | |
| 566 adaptlist = opts.adaptors.split(',') | |
| 567 adaptorseqs = ['ADAPTER_SEQUENCE=%s' % x for x in adaptlist] | |
| 568 cl += adaptorseqs | |
| 569 cl.append('IS_BISULFITE_SEQUENCED=%s' % opts.bisulphite) | |
| 570 cl.append('MAX_INSERT_SIZE=%s' % opts.maxinsert) | |
| 571 cl.append('OUTPUT=%s' % pic.metricsOut) | |
| 572 cl.append('R=%s' % fakefasta) | |
| 573 cl.append('TMP_DIR=%s' % opts.tmpdir) | |
| 574 if not opts.assumesorted.lower() == 'true': # we need to sort input | |
| 575 sortedfile = '%s.sorted' % os.path.basename(opts.input) | |
| 576 if opts.datatype == 'sam': # need to work with a bam | |
| 577 tlog,tempbam,trval = pic.samToBam(opts.input,opts.outdir) | |
| 578 pic.delme.append(tempbam) | |
| 579 try: | |
| 580 tlog = pic.sortSam(tempbam,sortedfile,opts.outdir) | |
| 581 except: | |
| 582 print '## exception on sorting sam file %s' % opts.input | |
| 583 else: # is already bam | |
| 584 try: | |
| 585 tlog = pic.sortSam(opts.input,sortedfile,opts.outdir) | |
| 586 except : # bug - [bam_sort_core] not being ignored - TODO fixme | |
| 587 print '## exception %s on sorting bam file %s' % (sys.exc_info()[0],opts.input) | |
| 588 cl.append('INPUT=%s.bam' % os.path.abspath(os.path.join(opts.outdir,sortedfile))) | |
| 589 pic.delme.append(os.path.join(opts.outdir,sortedfile)) | |
| 590 else: | |
| 591 cl.append('INPUT=%s' % os.path.abspath(opts.input)) | |
| 592 stdouts,rval = pic.runPic(opts.jar, cl) | |
| 593 | |
| 594 | |
| 595 elif pic.picname == 'CollectGcBiasMetrics': | |
| 596 assert os.path.isfile(ref_file_name),'PicardGC needs a reference sequence - cannot read %s' % ref_file_name | |
| 597 # sigh. Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data. | |
| 598 # why? Dunno | |
| 599 fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name)) | |
| 600 try: | |
| 601 os.symlink(ref_file_name,fakefasta) | |
| 602 except: | |
| 603 s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy' | |
| 604 info = s | |
| 605 shutil.copy(ref_file_name,fakefasta) | |
| 606 pic.delme.append(fakefasta) | |
| 607 x = 'rgPicardGCBiasMetrics' | |
| 608 pdfname = '%s.pdf' % x | |
| 609 jpgname = '%s.jpg' % x | |
| 610 tempout = os.path.join(opts.outdir,'rgPicardGCBiasMetrics.out') | |
| 611 temppdf = os.path.join(opts.outdir,pdfname) | |
| 612 cl.append('R=%s' % fakefasta) | |
| 613 cl.append('WINDOW_SIZE=%s' % opts.windowsize) | |
| 614 cl.append('MINIMUM_GENOME_FRACTION=%s' % opts.mingenomefrac) | |
| 615 cl.append('INPUT=%s' % opts.input) | |
| 616 cl.append('OUTPUT=%s' % tempout) | |
| 617 cl.append('TMP_DIR=%s' % opts.tmpdir) | |
| 618 cl.append('CHART_OUTPUT=%s' % temppdf) | |
| 619 cl.append('SUMMARY_OUTPUT=%s' % pic.metricsOut) | |
| 620 stdouts,rval = pic.runPic(opts.jar, cl) | |
| 621 if os.path.isfile(temppdf): | |
| 622 cl2 = ['convert','-resize x400',temppdf,os.path.join(opts.outdir,jpgname)] # make the jpg for fixPicardOutputs to find | |
| 623 s,stdouts,rval = pic.runCL(cl=cl2,output_dir=opts.outdir) | |
| 624 else: | |
| 625 s='### runGC: Unable to find pdf %s - please check the log for the causal problem\n' % temppdf | |
| 626 lf = open(pic.log_filename,'a') | |
| 627 lf.write(s) | |
| 628 lf.write('\n') | |
| 629 lf.close() | |
| 630 | |
| 631 elif pic.picname == 'CollectInsertSizeMetrics': | |
| 632 """ <command interpreter="python"> | |
| 633 picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" --deviations "$deviations" | |
| 634 --histwidth "$histWidth" --minpct "$minPct" --malevel "$malevel" | |
| 635 -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/picard/CollectInsertSizeMetrics.jar" -d "$html_file.files_path" -t "$html_file" | |
| 636 </command> | |
| 637 """ | |
| 638 isPDF = 'InsertSizeHist.pdf' | |
| 639 pdfpath = os.path.join(opts.outdir,isPDF) | |
| 640 histpdf = 'InsertSizeHist.pdf' | |
| 641 cl.append('I=%s' % opts.input) | |
| 642 cl.append('O=%s' % pic.metricsOut) | |
| 643 cl.append('HISTOGRAM_FILE=%s' % histpdf) | |
| 644 #if opts.taillimit <> '0': # this was deprecated although still mentioned in the docs at 1.56 | |
| 645 # cl.append('TAIL_LIMIT=%s' % opts.taillimit) | |
| 646 if opts.histwidth <> '0': | |
| 647 cl.append('HISTOGRAM_WIDTH=%s' % opts.histwidth) | |
| 648 if float( opts.minpct) > 0.0: | |
| 649 cl.append('MINIMUM_PCT=%s' % opts.minpct) | |
| 650 if float(opts.deviations) > 0.0: | |
| 651 cl.append('DEVIATIONS=%s' % opts.deviations) | |
| 652 if opts.malevel: | |
| 653 malists = opts.malevel.split(',') | |
| 654 malist = ['METRIC_ACCUMULATION_LEVEL=%s' % x for x in malists] | |
| 655 cl += malist | |
| 656 stdouts,rval = pic.runPic(opts.jar, cl) | |
| 657 if os.path.exists(pdfpath): # automake thumbnail - will be added to html | |
| 658 cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath] | |
| 659 pic.runCL(cl=cl2,output_dir=opts.outdir) | |
| 660 else: | |
| 661 s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath | |
| 662 s += 'This <b>always happens if single ended data was provided</b> to this tool,\n' | |
| 663 s += 'so please double check that your input data really is paired-end NGS data.<br/>\n' | |
| 664 s += 'If your input was paired data this may be a bug worth reporting to the galaxy-bugs list\n<br/>' | |
| 665 logging.info(s) | |
| 666 if len(stdouts) > 0: | |
| 667 logging.info(stdouts) | |
| 668 | |
| 669 elif pic.picname == 'MarkDuplicates': | |
| 670 # assume sorted even if header says otherwise | |
| 671 cl.append('ASSUME_SORTED=%s' % (opts.assumesorted)) | |
| 672 # input | |
| 673 cl.append('INPUT=%s' % opts.input) | |
| 674 # outputs | |
| 675 cl.append('OUTPUT=%s' % opts.output) | |
| 676 cl.append('METRICS_FILE=%s' % pic.metricsOut ) | |
| 677 # remove or mark duplicates | |
| 678 cl.append('REMOVE_DUPLICATES=%s' % opts.remdups) | |
| 679 # the regular expression to be used to parse reads in incoming SAM file | |
| 680 cl.append('READ_NAME_REGEX="%s"' % opts.readregex) | |
| 681 # maximum offset between two duplicate clusters | |
| 682 cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist) | |
| 683 stdouts,rval = pic.runPic(opts.jar, cl) | |
| 684 | |
| 685 elif pic.picname == 'FixMateInformation': | |
| 686 cl.append('I=%s' % opts.input) | |
| 687 cl.append('O=%s' % tempout) | |
| 688 cl.append('SORT_ORDER=%s' % opts.sortorder) | |
| 689 stdouts,rval = pic.runPic(opts.jar,cl) | |
| 690 haveTempout = True | |
| 691 | |
| 692 elif pic.picname == 'ReorderSam': | |
| 693 # input | |
| 694 cl.append('INPUT=%s' % opts.input) | |
| 695 # output | |
| 696 cl.append('OUTPUT=%s' % tempout) | |
| 697 # reference | |
| 698 cl.append('REFERENCE=%s' % ref_file_name) | |
| 699 # incomplete dict concordance | |
| 700 if opts.allow_inc_dict_concord == 'true': | |
| 701 cl.append('ALLOW_INCOMPLETE_DICT_CONCORDANCE=true') | |
| 702 # contig length discordance | |
| 703 if opts.allow_contig_len_discord == 'true': | |
| 704 cl.append('ALLOW_CONTIG_LENGTH_DISCORDANCE=true') | |
| 705 stdouts,rval = pic.runPic(opts.jar, cl) | |
| 706 haveTempout = True | |
| 707 | |
| 708 elif pic.picname == 'ReplaceSamHeader': | |
| 709 cl.append('INPUT=%s' % opts.input) | |
| 710 cl.append('OUTPUT=%s' % tempout) | |
| 711 cl.append('HEADER=%s' % opts.header_file) | |
| 712 stdouts,rval = pic.runPic(opts.jar, cl) | |
| 713 haveTempout = True | |
| 714 | |
| 715 elif pic.picname == 'CalculateHsMetrics': | |
| 716 maxloglines = 100 | |
| 717 baitfname = os.path.join(opts.outdir,'rgPicardHsMetrics.bait') | |
| 718 targetfname = os.path.join(opts.outdir,'rgPicardHsMetrics.target') | |
| 719 baitf = pic.makePicInterval(opts.baitbed,baitfname) | |
| 720 if opts.targetbed == opts.baitbed: # same file sometimes | |
| 721 targetf = baitf | |
| 722 else: | |
| 723 targetf = pic.makePicInterval(opts.targetbed,targetfname) | |
| 724 cl.append('BAIT_INTERVALS=%s' % baitf) | |
| 725 cl.append('TARGET_INTERVALS=%s' % targetf) | |
| 726 cl.append('INPUT=%s' % os.path.abspath(opts.input)) | |
| 727 cl.append('OUTPUT=%s' % pic.metricsOut) | |
| 728 cl.append('TMP_DIR=%s' % opts.tmpdir) | |
| 729 stdouts,rval = pic.runPic(opts.jar,cl) | |
| 730 | |
| 731 elif pic.picname == 'ValidateSamFile': | |
| 732 import pysam | |
| 733 doTranspose = False | |
| 734 sortedfile = os.path.join(opts.outdir,'rgValidate.sorted') | |
| 735 stf = open(pic.log_filename,'w') | |
| 736 tlog = None | |
| 737 if opts.datatype == 'sam': # need to work with a bam | |
| 738 tlog,tempbam,rval = pic.samToBam(opts.input,opts.outdir) | |
| 739 try: | |
| 740 tlog = pic.sortSam(tempbam,sortedfile,opts.outdir) | |
| 741 except: | |
| 742 print '## exception on sorting sam file %s' % opts.input | |
| 743 else: # is already bam | |
| 744 try: | |
| 745 tlog = pic.sortSam(opts.input,sortedfile,opts.outdir) | |
| 746 except: # bug - [bam_sort_core] not being ignored - TODO fixme | |
| 747 print '## exception on sorting bam file %s' % opts.input | |
| 748 if tlog: | |
| 749 print '##tlog=',tlog | |
| 750 stf.write(tlog) | |
| 751 stf.write('\n') | |
| 752 sortedfile = '%s.bam' % sortedfile # samtools does that | |
| 753 cl.append('O=%s' % pic.metricsOut) | |
| 754 cl.append('TMP_DIR=%s' % opts.tmpdir) | |
| 755 cl.append('I=%s' % sortedfile) | |
| 756 opts.maxerrors = '99999999' | |
| 757 cl.append('MAX_OUTPUT=%s' % opts.maxerrors) | |
| 758 if opts.ignoreflags[0] <> 'None': # picard error values to ignore | |
| 759 igs = ['IGNORE=%s' % x for x in opts.ignoreflags if x <> 'None'] | |
| 760 cl.append(' '.join(igs)) | |
| 761 if opts.bisulphite.lower() <> 'false': | |
| 762 cl.append('IS_BISULFITE_SEQUENCED=true') | |
| 763 if opts.ref <> None or opts.ref_file <> None: | |
| 764 cl.append('R=%s' % ref_file_name) | |
| 765 stdouts,rval = pic.runPic(opts.jar,cl) | |
| 766 if opts.datatype == 'sam': | |
| 767 pic.delme.append(tempbam) | |
| 768 newsam = opts.output | |
| 769 outformat = 'bam' | |
| 770 pe = open(pic.metricsOut,'r').readlines() | |
| 771 pic.cleanSam(insam=sortedfile, newsam=newsam, picardErrors=pe,outformat=outformat) | |
| 772 pic.delme.append(sortedfile) # not wanted | |
| 773 stf.close() | |
| 774 pic.cleanup() | |
| 775 | |
| 776 ####liubo added CleanSam tool#### | |
| 777 elif pic.picname == 'CleanSam': | |
| 778 # input | |
| 779 cl.append('INPUT=%s' % opts.input) | |
| 780 # output | |
| 781 cl.append('OUTPUT=%s' % tempout) | |
| 782 stdouts,rval = pic.runPic(opts.jar, cl) | |
| 783 haveTempout = True | |
| 784 | |
| 785 elif pic.picname == 'SortSam': | |
| 786 cl.append('I=%s' % opts.input) | |
| 787 cl.append('O=%s' % tempout) | |
| 788 cl.append('SORT_ORDER=%s' % opts.sortorder) | |
| 789 stdouts,rval = pic.runPic(opts.jar,cl) | |
| 790 haveTempout = True | |
| 791 | |
| 792 elif pic.picname == 'BuildBamIndex': | |
| 793 cl.append('I=%s' % opts.input) | |
| 794 cl.append('O=%s' % tempout) | |
| 795 stdouts,rval = pic.runPic(opts.jar,cl) | |
| 796 haveTempout = True | |
| 797 | |
| 798 elif pic.picname == 'SamFormatConverter': | |
| 799 cl.append('INPUT=%s' % opts.input) | |
| 800 cl.append('OUTPUT=%s' % tempout) | |
| 801 pic.runPic(opts.jar, cl) | |
| 802 haveTempout = True | |
| 803 elif pic.picname == "DownsampleSam": | |
| 804 cl.append('I=%s' % opts.input) | |
| 805 mystring = opts.output | |
| 806 mystringsam = mystring + ".sam" | |
| 807 cl.append('O=%s' % mystringsam) | |
| 808 if float(opts.probability) > 0: | |
| 809 cl.append('PROBABILITY=%s' % opts.probability) | |
| 810 if float(opts.seed) > 0: | |
| 811 cl.append('RANDOM_SEED=%s' % opts.seed) | |
| 812 stdouts,rval = pic.runPic(opts.jar, cl) | |
| 813 myoutput = mystringsam.replace(".sam", "") | |
| 814 os.rename(mystringsam,myoutput) | |
| 815 | |
| 816 elif pic.picname == 'MeanQualityByCycle': | |
| 817 isPDF = 'MeanQualityByCycle.pdf' | |
| 818 pdfpath = os.path.join(opts.outdir,isPDF) | |
| 819 histpdf = isPDF | |
| 820 cl.append('I=%s' % opts.input) | |
| 821 cl.append('O=%s' % pic.metricsOut) | |
| 822 cl.append('CHART_OUTPUT=%s' % histpdf) | |
| 823 cl.append('ASSUME_SORTED=%s' % (opts.assumesorted)) | |
| 824 cl.append('ALIGNED_READS_ONLY=%s' % (opts.alignedreadsonly)) | |
| 825 cl.append('PF_READS_ONLY=%s' % (opts.pfreadsonly)) | |
| 826 stdouts,rval = pic.runPic(opts.jar, cl) | |
| 827 if os.path.exists(pdfpath): # automake thumbnail - will be added to html | |
| 828 cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath] | |
| 829 pic.runCL(cl=cl2,output_dir=opts.outdir) | |
| 830 else: | |
| 831 s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath | |
| 832 logging.info(s) | |
| 833 if len(stdouts) > 0: | |
| 834 logging.info(stdouts) | |
| 835 | |
| 836 elif pic.picname == 'CollectRnaSeqMetrics': | |
| 837 isPDF = pic.picname + '.pdf' | |
| 838 pdfpath = os.path.join(opts.outdir,isPDF) | |
| 839 histpdf = isPDF | |
| 840 cl.append('I=%s' % opts.input) | |
| 841 cl.append('O=%s' % pic.metricsOut) | |
| 842 cl.append('CHART_OUTPUT=%s' % histpdf) | |
| 843 cl.append('ASSUME_SORTED=%s' % (opts.assumesorted)) | |
| 844 cl.append('MINIMUM_LENGTH=%s' % (opts.minlength)) | |
| 845 cl.append('REF_FLAT=%s' % (opts.refflat)) | |
| 846 cl.append('STRAND_SPECIFICITY=%s' % (opts.strandspecificity)) | |
| 847 cl.append('RIBOSOMAL_INTERVALS=%s' % (opts.ribosomalintervals)) | |
| 848 cl.append('RRNA_FRAGMENT_PERCENTAGE=%s' % (opts.rrnafragmentpercentage)) | |
| 849 if opts.ignoreseq: # picard error values to ignore | |
| 850 igs = ['IGNORE_SEQUENCE=%s' % x for x in opts.ignoreseq if x <> 'None'] | |
| 851 cl.append(' '.join(igs)) | |
| 852 if opts.malevel: | |
| 853 malists = opts.malevel.split(',') | |
| 854 malist = ['METRIC_ACCUMULATION_LEVEL=%s' % x for x in malists] | |
| 855 cl += malist | |
| 856 stdouts,rval = pic.runPic(opts.jar, cl) | |
| 857 if os.path.exists(pdfpath): # automake thumbnail - will be added to html | |
| 858 cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath] | |
| 859 pic.runCL(cl=cl2,output_dir=opts.outdir) | |
| 860 else: | |
| 861 s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath | |
| 862 logging.info(s) | |
| 863 if len(stdouts) > 0: | |
| 864 logging.info(stdouts) | |
| 865 | |
| 866 else: | |
| 867 print >> sys.stderr,'picard.py got an unknown tool name - %s' % pic.picname | |
| 868 sys.exit(1) | |
| 869 if haveTempout: | |
| 870 # Some Picard tools produced a potentially intermediate bam file. | |
| 871 # Either just move to final location or create sam | |
| 872 if os.path.exists(tempout): | |
| 873 shutil.move(tempout, os.path.abspath(opts.output)) | |
| 874 if opts.htmlout <> None or doFix: # return a pretty html page | |
| 875 pic.fixPicardOutputs(transpose=doTranspose,maxloglines=maxloglines) | |
| 876 if rval <> 0: | |
| 877 print >> sys.stderr, '## exit code=%d; stdout=%s' % (rval,stdouts) | |
| 878 # signal failure | |
| 879 if __name__=="__main__": __main__() |
