135
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Originally written by Kelly Vincent
|
|
4 pretty output and additional picard wrappers by Ross Lazarus for rgenetics
|
|
5 Runs all available wrapped Picard tools.
|
|
6 usage: picard_wrapper.py [options]
|
|
7 code Ross wrote licensed under the LGPL
|
|
8 see http://www.gnu.org/copyleft/lesser.html
|
|
9 """
|
|
10
|
|
11 import optparse, os, sys, subprocess, tempfile, shutil, time, logging
|
|
12
|
|
13 galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
|
|
14 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
15 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|
16 <head>
|
|
17 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
|
18 <meta name="generator" content="Galaxy %s tool output - see http://getgalaxy.org/" />
|
|
19 <title></title>
|
|
20 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
|
|
21 </head>
|
|
22 <body>
|
|
23 <div class="document">
|
|
24 """
|
|
25 galhtmlattr = """Galaxy tool %s run at %s</b><br/>"""
|
|
26 galhtmlpostfix = """</div></body></html>\n"""
|
|
27
|
|
28
|
|
29 def stop_err( msg ):
|
|
30 sys.stderr.write( '%s\n' % msg )
|
|
31 sys.exit()
|
|
32
|
|
33
|
|
34 def timenow():
|
|
35 """return current time as a string
|
|
36 """
|
|
37 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
|
|
38
|
|
39
|
|
40 class PicardBase():
|
|
41 """
|
|
42 simple base class with some utilities for Picard
|
|
43 adapted and merged with Kelly Vincent's code april 2011 Ross
|
|
44 lots of changes...
|
|
45 """
|
|
46
|
|
47 def __init__(self, opts=None,arg0=None):
|
|
48 """ common stuff needed at init for a picard tool
|
|
49 """
|
|
50 assert opts <> None, 'PicardBase needs opts at init'
|
|
51 self.opts = opts
|
|
52 if self.opts.outdir == None:
|
|
53 self.opts.outdir = os.getcwd() # fixmate has no html file eg so use temp dir
|
|
54 assert self.opts.outdir <> None,'## PicardBase needs a temp directory if no output directory passed in'
|
|
55 self.picname = self.baseName(opts.jar)
|
|
56 if self.picname.startswith('picard'):
|
|
57 self.picname = opts.picard_cmd # special case for some tools like replaceheader?
|
|
58 self.progname = self.baseName(arg0)
|
|
59 self.version = '0.002'
|
|
60 self.delme = [] # list of files to destroy
|
|
61 self.title = opts.title
|
|
62 self.inputfile = opts.input
|
|
63 try:
|
|
64 os.makedirs(opts.outdir)
|
|
65 except:
|
|
66 pass
|
|
67 try:
|
|
68 os.makedirs(opts.tmpdir)
|
|
69 except:
|
|
70 pass
|
|
71 self.log_filename = os.path.join(self.opts.outdir,'%s.log' % self.picname)
|
|
72 self.metricsOut = os.path.join(opts.outdir,'%s.metrics.txt' % self.picname)
|
|
73 self.setLogging(logfname=self.log_filename)
|
|
74
|
|
75 def baseName(self,name=None):
|
|
76 return os.path.splitext(os.path.basename(name))[0]
|
|
77
|
|
78 def setLogging(self,logfname="picard_wrapper.log"):
|
|
79 """setup a logger
|
|
80 """
|
|
81 logging.basicConfig(level=logging.INFO,
|
|
82 filename=logfname,
|
|
83 filemode='a')
|
|
84
|
|
85
|
|
86 def readLarge(self,fname=None):
|
|
87 """ read a potentially huge file.
|
|
88 """
|
|
89 try:
|
|
90 # get stderr, allowing for case where it's very large
|
|
91 tmp = open( fname, 'rb' )
|
|
92 s = ''
|
|
93 buffsize = 1048576
|
|
94 try:
|
|
95 while True:
|
|
96 more = tmp.read( buffsize )
|
|
97 if len(more) > 0:
|
|
98 s += more
|
|
99 else:
|
|
100 break
|
|
101 except OverflowError:
|
|
102 pass
|
|
103 tmp.close()
|
|
104 except Exception, e:
|
|
105 stop_err( 'Read Large Exception : %s' % str( e ) )
|
|
106 return s
|
|
107
|
|
108 def runCL(self,cl=None,output_dir=None):
|
|
109 """ construct and run a command line
|
|
110 we have galaxy's temp path as opt.temp_dir so don't really need isolation
|
|
111 sometimes stdout is needed as the output - ugly hacks to deal with potentially vast artifacts
|
|
112 """
|
|
113 assert cl <> None, 'PicardBase runCL needs a command line as cl'
|
|
114 if output_dir == None:
|
|
115 output_dir = self.opts.outdir
|
|
116 if type(cl) == type([]):
|
|
117 cl = ' '.join(cl)
|
|
118 fd,templog = tempfile.mkstemp(dir=output_dir,suffix='rgtempRun.txt')
|
|
119 tlf = open(templog,'wb')
|
|
120 fd,temperr = tempfile.mkstemp(dir=output_dir,suffix='rgtempErr.txt')
|
|
121 tef = open(temperr,'wb')
|
|
122 process = subprocess.Popen(cl, shell=True, stderr=tef, stdout=tlf, cwd=output_dir)
|
|
123 rval = process.wait()
|
|
124 tlf.close()
|
|
125 tef.close()
|
|
126 stderrs = self.readLarge(temperr)
|
|
127 stdouts = self.readLarge(templog)
|
|
128 if rval > 0:
|
|
129 s = '## executing %s returned status %d and stderr: \n%s\n' % (cl,rval,stderrs)
|
|
130 stdouts = '%s\n%s' % (stdouts,stderrs)
|
|
131 else:
|
|
132 s = '## executing %s returned status %d and nothing on stderr\n' % (cl,rval)
|
|
133 logging.info(s)
|
|
134 os.unlink(templog) # always
|
|
135 os.unlink(temperr) # always
|
|
136 return s, stdouts, rval # sometimes s is an output
|
|
137
|
|
138 def runPic(self, jar, cl):
|
|
139 """
|
|
140 cl should be everything after the jar file name in the command
|
|
141 """
|
|
142 runme = ['java -Xmx%s' % self.opts.maxjheap]
|
|
143 runme.append(" -Djava.io.tmpdir='%s' " % self.opts.tmpdir)
|
|
144 runme.append('-jar %s' % jar)
|
|
145 runme += cl
|
|
146 s,stdouts,rval = self.runCL(cl=runme, output_dir=self.opts.outdir)
|
|
147 return stdouts,rval
|
|
148
|
|
149 def samToBam(self,infile=None,outdir=None):
|
|
150 """
|
|
151 use samtools view to convert sam to bam
|
|
152 """
|
|
153 fd,tempbam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.bam')
|
|
154 cl = ['samtools view -h -b -S -o ',tempbam,infile]
|
|
155 tlog,stdouts,rval = self.runCL(cl,outdir)
|
|
156 return tlog,tempbam,rval
|
|
157
|
|
158 def sortSam(self, infile=None,outfile=None,outdir=None):
|
|
159 """
|
|
160 """
|
|
161 print '## sortSam got infile=%s,outfile=%s,outdir=%s' % (infile,outfile,outdir)
|
|
162 cl = ['samtools sort',infile,outfile]
|
|
163 tlog,stdouts,rval = self.runCL(cl,outdir)
|
|
164 return tlog
|
|
165
|
|
166 def cleanup(self):
|
|
167 for fname in self.delme:
|
|
168 try:
|
|
169 os.unlink(fname)
|
|
170 except:
|
|
171 pass
|
|
172
|
|
173 def prettyPicout(self,transpose,maxrows):
|
|
174 """organize picard outpouts into a report html page
|
|
175 """
|
|
176 res = []
|
|
177 try:
|
|
178 r = open(self.metricsOut,'r').readlines()
|
|
179 except:
|
|
180 r = []
|
|
181 if len(r) > 0:
|
|
182 res.append('<b>Picard on line resources</b><ul>\n')
|
|
183 res.append('<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li>\n')
|
|
184 res.append('<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/>\n')
|
|
185 if transpose:
|
|
186 res.append('<b>Picard output (transposed to make it easier to see)</b><hr/>\n')
|
|
187 else:
|
|
188 res.append('<b>Picard output</b><hr/>\n')
|
|
189 res.append('<table cellpadding="3" >\n')
|
|
190 dat = []
|
|
191 heads = []
|
|
192 lastr = len(r) - 1
|
|
193 # special case for estimate library complexity hist
|
|
194 thist = False
|
|
195 for i,row in enumerate(r):
|
|
196 if row.strip() > '':
|
|
197 srow = row.split('\t')
|
|
198 if row.startswith('#'):
|
|
199 heads.append(row.strip()) # want strings
|
|
200 else:
|
|
201 dat.append(srow) # want lists
|
|
202 if row.startswith('## HISTOGRAM'):
|
|
203 thist = True
|
|
204 if len(heads) > 0:
|
|
205 hres = ['<tr class="d%d"><td colspan="2">%s</td></tr>' % (i % 2,x) for i,x in enumerate(heads)]
|
|
206 res += hres
|
|
207 heads = []
|
|
208 if len(dat) > 0:
|
|
209 if transpose and not thist:
|
|
210 tdat = map(None,*dat) # transpose an arbitrary list of lists
|
|
211 tdat = ['<tr class="d%d"><td>%s</td><td>%s </td></tr>\n' % ((i+len(heads)) % 2,x[0],x[1]) for i,x in enumerate(tdat)]
|
|
212 else:
|
|
213 tdat = ['\t'.join(x).strip() for x in dat] # back to strings :(
|
|
214 tdat = ['<tr class="d%d"><td colspan="2">%s</td></tr>\n' % ((i+len(heads)) % 2,x) for i,x in enumerate(tdat)]
|
|
215 res += tdat
|
|
216 dat = []
|
|
217 res.append('</table>\n')
|
|
218 return res
|
|
219
|
|
220 def fixPicardOutputs(self,transpose,maxloglines):
|
|
221 """
|
|
222 picard produces long hard to read tab header files
|
|
223 make them available but present them transposed for readability
|
|
224 """
|
|
225 logging.shutdown()
|
|
226 self.cleanup() # remove temp files stored in delme
|
|
227 rstyle="""<style type="text/css">
|
|
228 tr.d0 td {background-color: oldlace; color: black;}
|
|
229 tr.d1 td {background-color: aliceblue; color: black;}
|
|
230 </style>"""
|
|
231 res = [rstyle,]
|
|
232 res.append(galhtmlprefix % self.progname)
|
|
233 res.append(galhtmlattr % (self.picname,timenow()))
|
|
234 flist = [x for x in os.listdir(self.opts.outdir) if not x.startswith('.')]
|
|
235 pdflist = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']
|
|
236 if len(pdflist) > 0: # assumes all pdfs come with thumbnail .jpgs
|
|
237 for p in pdflist:
|
|
238 pbase = os.path.splitext(p)[0] # removes .pdf
|
|
239 imghref = '%s.jpg' % pbase
|
|
240 mimghref = '%s-0.jpg' % pbase # multiple pages pdf -> multiple thumbnails without asking!
|
|
241 if mimghref in flist:
|
|
242 imghref=mimghref # only one for thumbnail...it's a multi page pdf
|
|
243 res.append('<table cellpadding="10"><tr><td>\n')
|
|
244 res.append('<a href="%s"><img src="%s" title="Click image preview for a print quality PDF version" hspace="10" align="middle"></a>\n' % (p,imghref))
|
|
245 res.append('</tr></td></table>\n')
|
|
246 if len(flist) > 0:
|
|
247 res.append('<b>The following output files were created (click the filename to view/download a copy):</b><hr/>')
|
|
248 res.append('<table>\n')
|
|
249 for i,f in enumerate(flist):
|
|
250 fn = os.path.split(f)[-1]
|
|
251 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fn))
|
|
252 res.append('</table><p/>\n')
|
|
253 pres = self.prettyPicout(transpose,maxloglines)
|
|
254 if len(pres) > 0:
|
|
255 res += pres
|
|
256 l = open(self.log_filename,'r').readlines()
|
|
257 llen = len(l)
|
|
258 if llen > 0:
|
|
259 res.append('<b>Picard Tool Run Log</b><hr/>\n')
|
|
260 rlog = ['<pre>',]
|
|
261 if llen > maxloglines:
|
|
262 n = min(50,int(maxloglines/2))
|
|
263 rlog += l[:n]
|
|
264 rlog.append('------------ ## %d rows deleted ## --------------\n' % (llen-maxloglines))
|
|
265 rlog += l[-n:]
|
|
266 else:
|
|
267 rlog += l
|
|
268 rlog.append('</pre>')
|
|
269 if llen > maxloglines:
|
|
270 rlog.append('\n<b>## WARNING - %d log lines truncated - <a href="%s">%s</a> contains entire output</b>' % (llen - maxloglines,self.log_filename,self.log_filename))
|
|
271 res += rlog
|
|
272 else:
|
|
273 res.append("### Odd, Picard left no log file %s - must have really barfed badly?\n" % self.log_filename)
|
|
274 res.append('<hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n')
|
|
275 res.append( 'generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool')
|
|
276 res.append(galhtmlpostfix)
|
|
277 outf = open(self.opts.htmlout,'w')
|
|
278 outf.write(''.join(res))
|
|
279 outf.write('\n')
|
|
280 outf.close()
|
|
281
|
|
282 def makePicInterval(self,inbed=None,outf=None):
|
|
283 """
|
|
284 picard wants bait and target files to have the same header length as the incoming bam/sam
|
|
285 a meaningful (ie accurate) representation will fail because of this - so this hack
|
|
286 it would be far better to be able to supply the original bed untouched
|
|
287 Additional checking added Ross Lazarus Dec 2011 to deal with two 'bug' reports on the list
|
|
288 """
|
|
289 assert inbed <> None
|
|
290 bed = open(inbed,'r').readlines()
|
|
291 sbed = [x.split('\t') for x in bed] # lengths MUST be 5
|
|
292 lens = [len(x) for x in sbed]
|
|
293 strands = [x[3] for x in sbed if not x[3] in ['+','-']]
|
|
294 maxl = max(lens)
|
|
295 minl = min(lens)
|
|
296 e = []
|
|
297 if maxl <> minl:
|
|
298 e.append("## Input error: Inconsistent field count in %s - please read the documentation on bait/target format requirements, fix and try again" % inbed)
|
|
299 if maxl <> 5:
|
|
300 e.append("## Input error: %d fields found in %s, 5 required - please read the warning and documentation on bait/target format requirements, fix and try again" % (maxl,inbed))
|
|
301 if len(strands) > 0:
|
|
302 e.append("## Input error: Fourth column in %s is not the required strand (+ or -) - please read the warning and documentation on bait/target format requirements, fix and try again" % (inbed))
|
|
303 if len(e) > 0: # write to stderr and quit
|
|
304 print >> sys.stderr, '\n'.join(e)
|
|
305 sys.exit(1)
|
|
306 thead = os.path.join(self.opts.outdir,'tempSamHead.txt')
|
|
307 if self.opts.datatype == 'sam':
|
|
308 cl = ['samtools view -H -S',self.opts.input,'>',thead]
|
|
309 else:
|
|
310 cl = ['samtools view -H',self.opts.input,'>',thead]
|
|
311 self.runCL(cl=cl,output_dir=self.opts.outdir)
|
|
312 head = open(thead,'r').readlines()
|
|
313 s = '## got %d rows of header\n' % (len(head))
|
|
314 logging.info(s)
|
|
315 o = open(outf,'w')
|
|
316 o.write(''.join(head))
|
|
317 o.write(''.join(bed))
|
|
318 o.close()
|
|
319 return outf
|
|
320
|
|
321 def cleanSam(self, insam=None, newsam=None, picardErrors=[],outformat=None):
|
|
322 """
|
|
323 interesting problem - if paired, must remove mate pair of errors too or we have a new set of errors after cleaning - missing mate pairs!
|
|
324 Do the work of removing all the error sequences
|
|
325 pysam is cool
|
|
326 infile = pysam.Samfile( "-", "r" )
|
|
327 outfile = pysam.Samfile( "-", "w", template = infile )
|
|
328 for s in infile: outfile.write(s)
|
|
329
|
|
330 errors from ValidateSameFile.jar look like
|
|
331 WARNING: Record 32, Read name SRR006041.1202260, NM tag (nucleotide differences) is missing
|
|
332 ERROR: Record 33, Read name SRR006041.1042721, Empty sequence dictionary.
|
|
333 ERROR: Record 33, Read name SRR006041.1042721, RG ID on SAMRecord not found in header: SRR006041
|
|
334
|
|
335 """
|
|
336 assert os.path.isfile(insam), 'rgPicardValidate cleansam needs an input sam file - cannot find %s' % insam
|
|
337 assert newsam <> None, 'rgPicardValidate cleansam needs an output new sam file path'
|
|
338 removeNames = [x.split(',')[1].replace(' Read name ','') for x in picardErrors if len(x.split(',')) > 2]
|
|
339 remDict = dict(zip(removeNames,range(len(removeNames))))
|
|
340 infile = pysam.Samfile(insam,'rb')
|
|
341 info = 'found %d error sequences in picardErrors, %d unique' % (len(removeNames),len(remDict))
|
|
342 if len(removeNames) > 0:
|
|
343 outfile = pysam.Samfile(newsam,'wb',template=infile) # template must be an open file
|
|
344 i = 0
|
|
345 j = 0
|
|
346 for row in infile:
|
|
347 dropme = remDict.get(row.qname,None) # keep if None
|
|
348 if not dropme:
|
|
349 outfile.write(row)
|
|
350 j += 1
|
|
351 else: # discard
|
|
352 i += 1
|
|
353 info = '%s\n%s' % (info, 'Discarded %d lines writing %d to %s from %s' % (i,j,newsam,insam))
|
|
354 outfile.close()
|
|
355 infile.close()
|
|
356 else: # we really want a nullop or a simple pointer copy
|
|
357 infile.close()
|
|
358 if newsam:
|
|
359 shutil.copy(insam,newsam)
|
|
360 logging.info(info)
|
|
361
|
|
362
|
|
363
|
|
364 def __main__():
|
|
365 doFix = False # tools returning htmlfile don't need this
|
|
366 doTranspose = True # default
|
|
367 maxloglines = 100 # default
|
|
368 #Parse Command Line
|
|
369 op = optparse.OptionParser()
|
|
370 # All tools
|
|
371 op.add_option('-i', '--input', dest='input', help='Input SAM or BAM file' )
|
|
372 op.add_option('-e', '--inputext', default=None)
|
|
373 op.add_option('-o', '--output', default=None)
|
|
374 op.add_option('-n', '--title', default="Pick a Picard Tool")
|
|
375 op.add_option('-t', '--htmlout', default=None)
|
|
376 op.add_option('-d', '--outdir', default=None)
|
|
377 op.add_option('-x', '--maxjheap', default='3000m')
|
|
378 op.add_option('-b', '--bisulphite', default='false')
|
|
379 op.add_option('-s', '--sortorder', default='query')
|
|
380 op.add_option('','--tmpdir', default='/tmp')
|
|
381 op.add_option('-j','--jar',default='')
|
|
382 op.add_option('','--picard-cmd',default=None)
|
|
383 # Many tools
|
|
384 op.add_option( '', '--output-format', dest='output_format', help='Output format' )
|
|
385 op.add_option( '', '--bai-file', dest='bai_file', help='The path to the index file for the input bam file' )
|
|
386 op.add_option( '', '--ref', dest='ref', help='Built-in reference with fasta and dict file', default=None )
|
|
387 # CreateSequenceDictionary
|
|
388 op.add_option( '', '--ref-file', dest='ref_file', help='Fasta to use as reference', default=None )
|
|
389 op.add_option( '', '--species-name', dest='species_name', help='Species name to use in creating dict file from fasta file' )
|
|
390 op.add_option( '', '--build-name', dest='build_name', help='Name of genome assembly to use in creating dict file from fasta file' )
|
|
391 op.add_option( '', '--trunc-names', dest='trunc_names', help='Truncate sequence names at first whitespace from fasta file' )
|
|
392 # MarkDuplicates
|
|
393 op.add_option( '', '--remdups', default='true', help='Remove duplicates from output file' )
|
|
394 op.add_option( '', '--optdupdist', default="100", help='Maximum pixels between two identical sequences in order to consider them optical duplicates.' )
|
|
395 # CollectInsertSizeMetrics
|
|
396 op.add_option('', '--taillimit', default="0")
|
|
397 op.add_option('', '--histwidth', default="0")
|
|
398 op.add_option('', '--minpct', default="0.01")
|
|
399 op.add_option('', '--malevel', default='')
|
|
400 op.add_option('', '--deviations', default="0.0")
|
|
401 # CollectAlignmentSummaryMetrics
|
|
402 op.add_option('', '--maxinsert', default="20")
|
|
403 op.add_option('', '--adaptors', default='')
|
|
404 # FixMateInformation and validate
|
|
405 # CollectGcBiasMetrics
|
|
406 op.add_option('', '--windowsize', default='100')
|
|
407 op.add_option('', '--mingenomefrac', default='0.00001')
|
|
408 # AddOrReplaceReadGroups
|
|
409 op.add_option( '', '--rg-opts', dest='rg_opts', help='Specify extra (optional) arguments with full, otherwise preSet' )
|
|
410 op.add_option( '', '--rg-lb', dest='rg_library', help='Read Group Library' )
|
|
411 op.add_option( '', '--rg-pl', dest='rg_platform', help='Read Group platform (e.g. illumina, solid)' )
|
|
412 op.add_option( '', '--rg-pu', dest='rg_plat_unit', help='Read Group platform unit (eg. run barcode) ' )
|
|
413 op.add_option( '', '--rg-sm', dest='rg_sample', help='Read Group sample name' )
|
|
414 op.add_option( '', '--rg-id', dest='rg_id', help='Read Group ID' )
|
|
415 op.add_option( '', '--rg-cn', dest='rg_seq_center', help='Read Group sequencing center name' )
|
|
416 op.add_option( '', '--rg-ds', dest='rg_desc', help='Read Group description' )
|
|
417 # ReorderSam
|
|
418 op.add_option( '', '--allow-inc-dict-concord', dest='allow_inc_dict_concord', help='Allow incomplete dict concordance' )
|
|
419 op.add_option( '', '--allow-contig-len-discord', dest='allow_contig_len_discord', help='Allow contig length discordance' )
|
|
420 # ReplaceSamHeader
|
|
421 op.add_option( '', '--header-file', dest='header_file', help='sam or bam file from which header will be read' )
|
|
422
|
|
423 op.add_option('','--assumesorted', default='true')
|
|
424 op.add_option('','--readregex', default="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*")
|
|
425 #estimatelibrarycomplexity
|
|
426 op.add_option('','--minid', default="5")
|
|
427 op.add_option('','--maxdiff', default="0.03")
|
|
428 op.add_option('','--minmeanq', default="20")
|
|
429 #hsmetrics
|
|
430 op.add_option('','--baitbed', default=None)
|
|
431 op.add_option('','--targetbed', default=None)
|
|
432 #validate
|
|
433 op.add_option('','--ignoreflags', action='append', type="string")
|
|
434 op.add_option('','--maxerrors', default=None)
|
|
435 op.add_option('','--datatype', default=None)
|
|
436 op.add_option('','--bamout', default=None)
|
|
437 op.add_option('','--samout', default=None)
|
|
438 #downsample
|
|
439 op.add_option('','--probability', default="1")
|
|
440 op.add_option('','--seed', default="1")
|
|
441 #meanqualitybycycle
|
|
442 op.add_option('','--pfreadsonly', default='false')
|
|
443 op.add_option('','--alignedreadsonly', default='false')
|
|
444 #collectrnaseqmetrics
|
|
445 op.add_option('','--ignoreseq', action='append', type="string")
|
|
446 op.add_option('','--minlength', default=None)
|
|
447 op.add_option('','--refflat', default=None)
|
|
448 op.add_option('','--strandspecificity', default=None)
|
|
449 op.add_option('','--ribosomalintervals', default='null')
|
|
450 op.add_option('','--rrnafragmentpercentage', default=None)
|
|
451
|
|
452 opts, args = op.parse_args()
|
|
453 opts.sortme = opts.assumesorted == 'false'
|
|
454 assert opts.input <> None
|
|
455 # need to add
|
|
456 # instance that does all the work
|
|
457 pic = PicardBase(opts,sys.argv[0])
|
|
458
|
|
459 tmp_dir = opts.outdir
|
|
460 haveTempout = False # we use this where sam output is an option
|
|
461 rval = 0
|
|
462 stdouts = 'Not run yet'
|
|
463 # set ref and dict files to use (create if necessary)
|
|
464 ref_file_name = opts.ref
|
|
465 if opts.ref_file <> None:
|
|
466 csd = 'CreateSequenceDictionary'
|
|
467 realjarpath = os.path.split(opts.jar)[0]
|
|
468 jarpath = os.path.join(realjarpath,'%s.jar' % csd) # for refseq
|
|
469 tmp_ref_fd, tmp_ref_name = tempfile.mkstemp( dir=opts.tmpdir , prefix = pic.picname)
|
|
470 ref_file_name = '%s.fasta' % tmp_ref_name
|
|
471 # build dict
|
|
472 dict_file_name = '%s.dict' % tmp_ref_name
|
|
473 os.symlink( opts.ref_file, ref_file_name )
|
|
474 cl = ['REFERENCE=%s' % ref_file_name]
|
|
475 cl.append('OUTPUT=%s' % dict_file_name)
|
|
476 cl.append('URI=%s' % os.path.basename( opts.ref_file ))
|
|
477 cl.append('TRUNCATE_NAMES_AT_WHITESPACE=%s' % opts.trunc_names)
|
|
478 if opts.species_name:
|
|
479 cl.append('SPECIES=%s' % opts.species_name)
|
|
480 if opts.build_name:
|
|
481 cl.append('GENOME_ASSEMBLY=%s' % opts.build_name)
|
|
482 pic.delme.append(dict_file_name)
|
|
483 pic.delme.append(ref_file_name)
|
|
484 pic.delme.append(tmp_ref_name)
|
|
485 stdouts,rval = pic.runPic(jarpath, cl)
|
|
486 # run relevant command(s)
|
|
487
|
|
488 # define temporary output
|
|
489 # if output is sam, it must have that extension, otherwise bam will be produced
|
|
490 # specify sam or bam file with extension
|
|
491 if opts.output_format == 'sam':
|
|
492 suff = '.sam'
|
|
493 else:
|
|
494 suff = ''
|
|
495 tmp_fd, tempout = tempfile.mkstemp( dir=opts.tmpdir, suffix=suff )
|
|
496
|
|
497 cl = ['VALIDATION_STRINGENCY=LENIENT',]
|
|
498
|
|
499 if pic.picname == 'AddOrReplaceReadGroups':
|
|
500 # sort order to match Galaxy's default
|
|
501 cl.append('SORT_ORDER=coordinate')
|
|
502 # input
|
|
503 cl.append('INPUT=%s' % opts.input)
|
|
504 # outputs
|
|
505 cl.append('OUTPUT=%s' % tempout)
|
|
506 # required read groups
|
|
507 cl.append('RGLB="%s"' % opts.rg_library)
|
|
508 cl.append('RGPL="%s"' % opts.rg_platform)
|
|
509 cl.append('RGPU="%s"' % opts.rg_plat_unit)
|
|
510 cl.append('RGSM="%s"' % opts.rg_sample)
|
|
511 if opts.rg_id:
|
|
512 cl.append('RGID="%s"' % opts.rg_id)
|
|
513 # optional read groups
|
|
514 if opts.rg_seq_center:
|
|
515 cl.append('RGCN="%s"' % opts.rg_seq_center)
|
|
516 if opts.rg_desc:
|
|
517 cl.append('RGDS="%s"' % opts.rg_desc)
|
|
518 stdouts,rval = pic.runPic(opts.jar, cl)
|
|
519 haveTempout = True
|
|
520
|
|
521 elif pic.picname == 'BamIndexStats':
|
|
522 tmp_fd, tmp_name = tempfile.mkstemp( dir=tmp_dir )
|
|
523 tmp_bam_name = '%s.bam' % tmp_name
|
|
524 tmp_bai_name = '%s.bai' % tmp_bam_name
|
|
525 os.symlink( opts.input, tmp_bam_name )
|
|
526 os.symlink( opts.bai_file, tmp_bai_name )
|
|
527 cl.append('INPUT=%s' % ( tmp_bam_name ))
|
|
528 pic.delme.append(tmp_bam_name)
|
|
529 pic.delme.append(tmp_bai_name)
|
|
530 pic.delme.append(tmp_name)
|
|
531 stdouts,rval = pic.runPic( opts.jar, cl )
|
|
532 f = open(pic.metricsOut,'a')
|
|
533 f.write(stdouts) # got this on stdout from runCl
|
|
534 f.write('\n')
|
|
535 f.close()
|
|
536 doTranspose = False # but not transposed
|
|
537
|
|
538 elif pic.picname == 'EstimateLibraryComplexity':
|
|
539 cl.append('I=%s' % opts.input)
|
|
540 cl.append('O=%s' % pic.metricsOut)
|
|
541 if float(opts.minid) > 0:
|
|
542 cl.append('MIN_IDENTICAL_BASES=%s' % opts.minid)
|
|
543 if float(opts.maxdiff) > 0.0:
|
|
544 cl.append('MAX_DIFF_RATE=%s' % opts.maxdiff)
|
|
545 if float(opts.minmeanq) > 0:
|
|
546 cl.append('MIN_MEAN_QUALITY=%s' % opts.minmeanq)
|
|
547 if opts.readregex > '':
|
|
548 cl.append('READ_NAME_REGEX="%s"' % opts.readregex)
|
|
549 if float(opts.optdupdist) > 0:
|
|
550 cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist)
|
|
551 stdouts,rval = pic.runPic(opts.jar, cl)
|
|
552
|
|
553 elif pic.picname == 'CollectAlignmentSummaryMetrics':
|
|
554 # Why do we do this fakefasta thing?
|
|
555 # Because we need NO fai to be available or picard barfs unless it matches the input data.
|
|
556 # why? Dunno Seems to work without complaining if the .bai file is AWOL....
|
|
557 fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name))
|
|
558 try:
|
|
559 os.symlink(ref_file_name,fakefasta)
|
|
560 except:
|
|
561 s = '## unable to symlink %s to %s - different devices? Will shutil.copy'
|
|
562 info = s
|
|
563 shutil.copy(ref_file_name,fakefasta)
|
|
564 pic.delme.append(fakefasta)
|
|
565 cl.append('ASSUME_SORTED=true')
|
|
566 adaptlist = opts.adaptors.split(',')
|
|
567 adaptorseqs = ['ADAPTER_SEQUENCE=%s' % x for x in adaptlist]
|
|
568 cl += adaptorseqs
|
|
569 cl.append('IS_BISULFITE_SEQUENCED=%s' % opts.bisulphite)
|
|
570 cl.append('MAX_INSERT_SIZE=%s' % opts.maxinsert)
|
|
571 cl.append('OUTPUT=%s' % pic.metricsOut)
|
|
572 cl.append('R=%s' % fakefasta)
|
|
573 cl.append('TMP_DIR=%s' % opts.tmpdir)
|
|
574 if not opts.assumesorted.lower() == 'true': # we need to sort input
|
|
575 sortedfile = '%s.sorted' % os.path.basename(opts.input)
|
|
576 if opts.datatype == 'sam': # need to work with a bam
|
|
577 tlog,tempbam,trval = pic.samToBam(opts.input,opts.outdir)
|
|
578 pic.delme.append(tempbam)
|
|
579 try:
|
|
580 tlog = pic.sortSam(tempbam,sortedfile,opts.outdir)
|
|
581 except:
|
|
582 print '## exception on sorting sam file %s' % opts.input
|
|
583 else: # is already bam
|
|
584 try:
|
|
585 tlog = pic.sortSam(opts.input,sortedfile,opts.outdir)
|
|
586 except : # bug - [bam_sort_core] not being ignored - TODO fixme
|
|
587 print '## exception %s on sorting bam file %s' % (sys.exc_info()[0],opts.input)
|
|
588 cl.append('INPUT=%s.bam' % os.path.abspath(os.path.join(opts.outdir,sortedfile)))
|
|
589 pic.delme.append(os.path.join(opts.outdir,sortedfile))
|
|
590 else:
|
|
591 cl.append('INPUT=%s' % os.path.abspath(opts.input))
|
|
592 stdouts,rval = pic.runPic(opts.jar, cl)
|
|
593
|
|
594
|
|
595 elif pic.picname == 'CollectGcBiasMetrics':
|
|
596 assert os.path.isfile(ref_file_name),'PicardGC needs a reference sequence - cannot read %s' % ref_file_name
|
|
597 # sigh. Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data.
|
|
598 # why? Dunno
|
|
599 fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name))
|
|
600 try:
|
|
601 os.symlink(ref_file_name,fakefasta)
|
|
602 except:
|
|
603 s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy'
|
|
604 info = s
|
|
605 shutil.copy(ref_file_name,fakefasta)
|
|
606 pic.delme.append(fakefasta)
|
|
607 x = 'rgPicardGCBiasMetrics'
|
|
608 pdfname = '%s.pdf' % x
|
|
609 jpgname = '%s.jpg' % x
|
|
610 tempout = os.path.join(opts.outdir,'rgPicardGCBiasMetrics.out')
|
|
611 temppdf = os.path.join(opts.outdir,pdfname)
|
|
612 cl.append('R=%s' % fakefasta)
|
|
613 cl.append('WINDOW_SIZE=%s' % opts.windowsize)
|
|
614 cl.append('MINIMUM_GENOME_FRACTION=%s' % opts.mingenomefrac)
|
|
615 cl.append('INPUT=%s' % opts.input)
|
|
616 cl.append('OUTPUT=%s' % tempout)
|
|
617 cl.append('TMP_DIR=%s' % opts.tmpdir)
|
|
618 cl.append('CHART_OUTPUT=%s' % temppdf)
|
|
619 cl.append('SUMMARY_OUTPUT=%s' % pic.metricsOut)
|
|
620 stdouts,rval = pic.runPic(opts.jar, cl)
|
|
621 if os.path.isfile(temppdf):
|
|
622 cl2 = ['convert','-resize x400',temppdf,os.path.join(opts.outdir,jpgname)] # make the jpg for fixPicardOutputs to find
|
|
623 s,stdouts,rval = pic.runCL(cl=cl2,output_dir=opts.outdir)
|
|
624 else:
|
|
625 s='### runGC: Unable to find pdf %s - please check the log for the causal problem\n' % temppdf
|
|
626 lf = open(pic.log_filename,'a')
|
|
627 lf.write(s)
|
|
628 lf.write('\n')
|
|
629 lf.close()
|
|
630
|
|
631 elif pic.picname == 'CollectInsertSizeMetrics':
|
|
632 """ <command interpreter="python">
|
|
633 picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" --deviations "$deviations"
|
|
634 --histwidth "$histWidth" --minpct "$minPct" --malevel "$malevel"
|
|
635 -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/picard/CollectInsertSizeMetrics.jar" -d "$html_file.files_path" -t "$html_file"
|
|
636 </command>
|
|
637 """
|
|
638 isPDF = 'InsertSizeHist.pdf'
|
|
639 pdfpath = os.path.join(opts.outdir,isPDF)
|
|
640 histpdf = 'InsertSizeHist.pdf'
|
|
641 cl.append('I=%s' % opts.input)
|
|
642 cl.append('O=%s' % pic.metricsOut)
|
|
643 cl.append('HISTOGRAM_FILE=%s' % histpdf)
|
|
644 #if opts.taillimit <> '0': # this was deprecated although still mentioned in the docs at 1.56
|
|
645 # cl.append('TAIL_LIMIT=%s' % opts.taillimit)
|
|
646 if opts.histwidth <> '0':
|
|
647 cl.append('HISTOGRAM_WIDTH=%s' % opts.histwidth)
|
|
648 if float( opts.minpct) > 0.0:
|
|
649 cl.append('MINIMUM_PCT=%s' % opts.minpct)
|
|
650 if float(opts.deviations) > 0.0:
|
|
651 cl.append('DEVIATIONS=%s' % opts.deviations)
|
|
652 if opts.malevel:
|
|
653 malists = opts.malevel.split(',')
|
|
654 malist = ['METRIC_ACCUMULATION_LEVEL=%s' % x for x in malists]
|
|
655 cl += malist
|
|
656 stdouts,rval = pic.runPic(opts.jar, cl)
|
|
657 if os.path.exists(pdfpath): # automake thumbnail - will be added to html
|
|
658 cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath]
|
|
659 pic.runCL(cl=cl2,output_dir=opts.outdir)
|
|
660 else:
|
|
661 s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath
|
|
662 s += 'This <b>always happens if single ended data was provided</b> to this tool,\n'
|
|
663 s += 'so please double check that your input data really is paired-end NGS data.<br/>\n'
|
|
664 s += 'If your input was paired data this may be a bug worth reporting to the galaxy-bugs list\n<br/>'
|
|
665 logging.info(s)
|
|
666 if len(stdouts) > 0:
|
|
667 logging.info(stdouts)
|
|
668
|
|
669 elif pic.picname == 'MarkDuplicates':
|
|
670 # assume sorted even if header says otherwise
|
|
671 cl.append('ASSUME_SORTED=%s' % (opts.assumesorted))
|
|
672 # input
|
|
673 cl.append('INPUT=%s' % opts.input)
|
|
674 # outputs
|
|
675 cl.append('OUTPUT=%s' % opts.output)
|
|
676 cl.append('METRICS_FILE=%s' % pic.metricsOut )
|
|
677 # remove or mark duplicates
|
|
678 cl.append('REMOVE_DUPLICATES=%s' % opts.remdups)
|
|
679 # the regular expression to be used to parse reads in incoming SAM file
|
|
680 cl.append('READ_NAME_REGEX="%s"' % opts.readregex)
|
|
681 # maximum offset between two duplicate clusters
|
|
682 cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist)
|
|
683 stdouts,rval = pic.runPic(opts.jar, cl)
|
|
684
|
|
685 elif pic.picname == 'FixMateInformation':
|
|
686 cl.append('I=%s' % opts.input)
|
|
687 cl.append('O=%s' % tempout)
|
|
688 cl.append('SORT_ORDER=%s' % opts.sortorder)
|
|
689 stdouts,rval = pic.runPic(opts.jar,cl)
|
|
690 haveTempout = True
|
|
691
|
|
692 elif pic.picname == 'ReorderSam':
|
|
693 # input
|
|
694 cl.append('INPUT=%s' % opts.input)
|
|
695 # output
|
|
696 cl.append('OUTPUT=%s' % tempout)
|
|
697 # reference
|
|
698 cl.append('REFERENCE=%s' % ref_file_name)
|
|
699 # incomplete dict concordance
|
|
700 if opts.allow_inc_dict_concord == 'true':
|
|
701 cl.append('ALLOW_INCOMPLETE_DICT_CONCORDANCE=true')
|
|
702 # contig length discordance
|
|
703 if opts.allow_contig_len_discord == 'true':
|
|
704 cl.append('ALLOW_CONTIG_LENGTH_DISCORDANCE=true')
|
|
705 stdouts,rval = pic.runPic(opts.jar, cl)
|
|
706 haveTempout = True
|
|
707
|
|
708 elif pic.picname == 'ReplaceSamHeader':
|
|
709 cl.append('INPUT=%s' % opts.input)
|
|
710 cl.append('OUTPUT=%s' % tempout)
|
|
711 cl.append('HEADER=%s' % opts.header_file)
|
|
712 stdouts,rval = pic.runPic(opts.jar, cl)
|
|
713 haveTempout = True
|
|
714
|
|
715 elif pic.picname == 'CalculateHsMetrics':
|
|
716 maxloglines = 100
|
|
717 baitfname = os.path.join(opts.outdir,'rgPicardHsMetrics.bait')
|
|
718 targetfname = os.path.join(opts.outdir,'rgPicardHsMetrics.target')
|
|
719 baitf = pic.makePicInterval(opts.baitbed,baitfname)
|
|
720 if opts.targetbed == opts.baitbed: # same file sometimes
|
|
721 targetf = baitf
|
|
722 else:
|
|
723 targetf = pic.makePicInterval(opts.targetbed,targetfname)
|
|
724 cl.append('BAIT_INTERVALS=%s' % baitf)
|
|
725 cl.append('TARGET_INTERVALS=%s' % targetf)
|
|
726 cl.append('INPUT=%s' % os.path.abspath(opts.input))
|
|
727 cl.append('OUTPUT=%s' % pic.metricsOut)
|
|
728 cl.append('TMP_DIR=%s' % opts.tmpdir)
|
|
729 stdouts,rval = pic.runPic(opts.jar,cl)
|
|
730
|
|
731 elif pic.picname == 'ValidateSamFile':
|
|
732 import pysam
|
|
733 doTranspose = False
|
|
734 sortedfile = os.path.join(opts.outdir,'rgValidate.sorted')
|
|
735 stf = open(pic.log_filename,'w')
|
|
736 tlog = None
|
|
737 if opts.datatype == 'sam': # need to work with a bam
|
|
738 tlog,tempbam,rval = pic.samToBam(opts.input,opts.outdir)
|
|
739 try:
|
|
740 tlog = pic.sortSam(tempbam,sortedfile,opts.outdir)
|
|
741 except:
|
|
742 print '## exception on sorting sam file %s' % opts.input
|
|
743 else: # is already bam
|
|
744 try:
|
|
745 tlog = pic.sortSam(opts.input,sortedfile,opts.outdir)
|
|
746 except: # bug - [bam_sort_core] not being ignored - TODO fixme
|
|
747 print '## exception on sorting bam file %s' % opts.input
|
|
748 if tlog:
|
|
749 print '##tlog=',tlog
|
|
750 stf.write(tlog)
|
|
751 stf.write('\n')
|
|
752 sortedfile = '%s.bam' % sortedfile # samtools does that
|
|
753 cl.append('O=%s' % pic.metricsOut)
|
|
754 cl.append('TMP_DIR=%s' % opts.tmpdir)
|
|
755 cl.append('I=%s' % sortedfile)
|
|
756 opts.maxerrors = '99999999'
|
|
757 cl.append('MAX_OUTPUT=%s' % opts.maxerrors)
|
|
758 if opts.ignoreflags[0] <> 'None': # picard error values to ignore
|
|
759 igs = ['IGNORE=%s' % x for x in opts.ignoreflags if x <> 'None']
|
|
760 cl.append(' '.join(igs))
|
|
761 if opts.bisulphite.lower() <> 'false':
|
|
762 cl.append('IS_BISULFITE_SEQUENCED=true')
|
|
763 if opts.ref <> None or opts.ref_file <> None:
|
|
764 cl.append('R=%s' % ref_file_name)
|
|
765 stdouts,rval = pic.runPic(opts.jar,cl)
|
|
766 if opts.datatype == 'sam':
|
|
767 pic.delme.append(tempbam)
|
|
768 newsam = opts.output
|
|
769 outformat = 'bam'
|
|
770 pe = open(pic.metricsOut,'r').readlines()
|
|
771 pic.cleanSam(insam=sortedfile, newsam=newsam, picardErrors=pe,outformat=outformat)
|
|
772 pic.delme.append(sortedfile) # not wanted
|
|
773 stf.close()
|
|
774 pic.cleanup()
|
|
775
|
|
776 ####liubo added CleanSam tool####
|
|
777 elif pic.picname == 'CleanSam':
|
|
778 # input
|
|
779 cl.append('INPUT=%s' % opts.input)
|
|
780 # output
|
|
781 cl.append('OUTPUT=%s' % tempout)
|
|
782 stdouts,rval = pic.runPic(opts.jar, cl)
|
|
783 haveTempout = True
|
|
784
|
|
785 elif pic.picname == 'SortSam':
|
|
786 cl.append('I=%s' % opts.input)
|
|
787 cl.append('O=%s' % tempout)
|
|
788 cl.append('SORT_ORDER=%s' % opts.sortorder)
|
|
789 stdouts,rval = pic.runPic(opts.jar,cl)
|
|
790 haveTempout = True
|
|
791
|
|
792 elif pic.picname == 'BuildBamIndex':
|
|
793 cl.append('I=%s' % opts.input)
|
|
794 cl.append('O=%s' % tempout)
|
|
795 stdouts,rval = pic.runPic(opts.jar,cl)
|
|
796 haveTempout = True
|
|
797
|
|
798 elif pic.picname == 'SamFormatConverter':
|
|
799 cl.append('INPUT=%s' % opts.input)
|
|
800 cl.append('OUTPUT=%s' % tempout)
|
|
801 pic.runPic(opts.jar, cl)
|
|
802 haveTempout = True
|
|
803 elif pic.picname == "DownsampleSam":
|
|
804 cl.append('I=%s' % opts.input)
|
|
805 mystring = opts.output
|
|
806 mystringsam = mystring + ".sam"
|
|
807 cl.append('O=%s' % mystringsam)
|
|
808 if float(opts.probability) > 0:
|
|
809 cl.append('PROBABILITY=%s' % opts.probability)
|
|
810 if float(opts.seed) > 0:
|
|
811 cl.append('RANDOM_SEED=%s' % opts.seed)
|
|
812 stdouts,rval = pic.runPic(opts.jar, cl)
|
|
813 myoutput = mystringsam.replace(".sam", "")
|
|
814 os.rename(mystringsam,myoutput)
|
|
815
|
|
816 elif pic.picname == 'MeanQualityByCycle':
|
|
817 isPDF = 'MeanQualityByCycle.pdf'
|
|
818 pdfpath = os.path.join(opts.outdir,isPDF)
|
|
819 histpdf = isPDF
|
|
820 cl.append('I=%s' % opts.input)
|
|
821 cl.append('O=%s' % pic.metricsOut)
|
|
822 cl.append('CHART_OUTPUT=%s' % histpdf)
|
|
823 cl.append('ASSUME_SORTED=%s' % (opts.assumesorted))
|
|
824 cl.append('ALIGNED_READS_ONLY=%s' % (opts.alignedreadsonly))
|
|
825 cl.append('PF_READS_ONLY=%s' % (opts.pfreadsonly))
|
|
826 stdouts,rval = pic.runPic(opts.jar, cl)
|
|
827 if os.path.exists(pdfpath): # automake thumbnail - will be added to html
|
|
828 cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath]
|
|
829 pic.runCL(cl=cl2,output_dir=opts.outdir)
|
|
830 else:
|
|
831 s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath
|
|
832 logging.info(s)
|
|
833 if len(stdouts) > 0:
|
|
834 logging.info(stdouts)
|
|
835
|
|
836 elif pic.picname == 'CollectRnaSeqMetrics':
|
|
837 isPDF = pic.picname + '.pdf'
|
|
838 pdfpath = os.path.join(opts.outdir,isPDF)
|
|
839 histpdf = isPDF
|
|
840 cl.append('I=%s' % opts.input)
|
|
841 cl.append('O=%s' % pic.metricsOut)
|
|
842 cl.append('CHART_OUTPUT=%s' % histpdf)
|
|
843 cl.append('ASSUME_SORTED=%s' % (opts.assumesorted))
|
|
844 cl.append('MINIMUM_LENGTH=%s' % (opts.minlength))
|
|
845 cl.append('REF_FLAT=%s' % (opts.refflat))
|
|
846 cl.append('STRAND_SPECIFICITY=%s' % (opts.strandspecificity))
|
|
847 cl.append('RIBOSOMAL_INTERVALS=%s' % (opts.ribosomalintervals))
|
|
848 cl.append('RRNA_FRAGMENT_PERCENTAGE=%s' % (opts.rrnafragmentpercentage))
|
|
849 if opts.ignoreseq: # picard error values to ignore
|
|
850 igs = ['IGNORE_SEQUENCE=%s' % x for x in opts.ignoreseq if x <> 'None']
|
|
851 cl.append(' '.join(igs))
|
|
852 if opts.malevel:
|
|
853 malists = opts.malevel.split(',')
|
|
854 malist = ['METRIC_ACCUMULATION_LEVEL=%s' % x for x in malists]
|
|
855 cl += malist
|
|
856 stdouts,rval = pic.runPic(opts.jar, cl)
|
|
857 if os.path.exists(pdfpath): # automake thumbnail - will be added to html
|
|
858 cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath]
|
|
859 pic.runCL(cl=cl2,output_dir=opts.outdir)
|
|
860 else:
|
|
861 s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath
|
|
862 logging.info(s)
|
|
863 if len(stdouts) > 0:
|
|
864 logging.info(stdouts)
|
|
865
|
|
866 else:
|
|
867 print >> sys.stderr,'picard.py got an unknown tool name - %s' % pic.picname
|
|
868 sys.exit(1)
|
|
869 if haveTempout:
|
|
870 # Some Picard tools produced a potentially intermediate bam file.
|
|
871 # Either just move to final location or create sam
|
|
872 if os.path.exists(tempout):
|
|
873 shutil.move(tempout, os.path.abspath(opts.output))
|
|
874 if opts.htmlout <> None or doFix: # return a pretty html page
|
|
875 pic.fixPicardOutputs(transpose=doTranspose,maxloglines=maxloglines)
|
|
876 if rval <> 0:
|
|
877 print >> sys.stderr, '## exit code=%d; stdout=%s' % (rval,stdouts)
|
|
878 # signal failure
|
|
879 if __name__=="__main__": __main__()
|