Mercurial > repos > devteam > picard1106
comparison picard_wrapper.py @ 42:c25cd3e674a5 draft
Uploaded
author | devteam |
---|---|
date | Thu, 13 Feb 2014 22:02:05 -0500 |
parents | 1f4e2e13d9dc |
children |
comparison
equal
deleted
inserted
replaced
41:468673afa348 | 42:c25cd3e674a5 |
---|---|
13 galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> | 13 galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?> |
14 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | 14 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
15 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | 15 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
16 <head> | 16 <head> |
17 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | 17 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
18 <meta name="generator" content="Bluefish 2.2.3" /> | 18 <meta name="generator" content="Galaxy %s tool output - see http://getgalaxy.org/" /> |
19 <title></title> | 19 <title></title> |
20 <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> | 20 <link rel="stylesheet" href="/static/style/base.css" type="text/css" /> |
21 </head> | 21 </head> |
22 <body> | 22 <body> |
23 <div class="document"> | 23 <div class="document"> |
31 sys.exit() | 31 sys.exit() |
32 | 32 |
33 | 33 |
34 def timenow(): | 34 def timenow(): |
35 """return current time as a string | 35 """return current time as a string |
36 """ | 36 """ |
37 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) | 37 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) |
38 | 38 |
39 | 39 |
40 class PicardBase(): | 40 class PicardBase(): |
41 """ | 41 """ |
42 simple base class with some utilities for Picard | 42 simple base class with some utilities for Picard |
43 adapted and merged with Kelly Vincent's code april 2011 Ross | 43 adapted and merged with Kelly Vincent's code april 2011 Ross |
44 lots of changes... | 44 lots of changes... |
45 """ | 45 """ |
46 | 46 |
47 def __init__(self, opts=None,arg0=None): | 47 def __init__(self, opts=None,arg0=None): |
48 """ common stuff needed at init for a picard tool | 48 """ common stuff needed at init for a picard tool |
49 """ | 49 """ |
50 assert opts <> None, 'PicardBase needs opts at init' | 50 assert opts <> None, 'PicardBase needs opts at init' |
51 self.opts = opts | 51 self.opts = opts |
52 if self.opts.outdir == None: | 52 if self.opts.outdir == None: |
53 self.opts.outdir = os.getcwd() # fixmate has no html file eg so use temp dir | 53 self.opts.outdir = os.getcwd() # fixmate has no html file eg so use temp dir |
54 assert self.opts.outdir <> None,'## PicardBase needs a temp directory if no output directory passed in' | 54 assert self.opts.outdir <> None,'## PicardBase needs a temp directory if no output directory passed in' |
67 try: | 67 try: |
68 os.makedirs(opts.tmpdir) | 68 os.makedirs(opts.tmpdir) |
69 except: | 69 except: |
70 pass | 70 pass |
71 self.log_filename = os.path.join(self.opts.outdir,'%s.log' % self.picname) | 71 self.log_filename = os.path.join(self.opts.outdir,'%s.log' % self.picname) |
72 self.metricsOut = os.path.join(opts.outdir,'%s.metrics.txt' % self.picname) | 72 self.metricsOut = os.path.join(opts.outdir,'%s.metrics.txt' % self.picname) |
73 self.setLogging(logfname=self.log_filename) | 73 self.setLogging(logfname=self.log_filename) |
74 | 74 |
75 def baseName(self,name=None): | 75 def baseName(self,name=None): |
76 return os.path.splitext(os.path.basename(name))[0] | 76 return os.path.splitext(os.path.basename(name))[0] |
77 | 77 |
78 def setLogging(self,logfname="picard_wrapper.log"): | 78 def setLogging(self,logfname="picard_wrapper.log"): |
79 """setup a logger | 79 """setup a logger |
80 """ | 80 """ |
81 logging.basicConfig(level=logging.INFO, | 81 logging.basicConfig(level=logging.INFO, |
82 filename=logfname, | 82 filename=logfname, |
83 filemode='a') | 83 filemode='a') |
84 | 84 |
85 | 85 |
86 def readLarge(self,fname=None): | 86 def readLarge(self,fname=None): |
87 """ read a potentially huge file. | 87 """ read a potentially huge file. |
88 """ | 88 """ |
89 try: | 89 try: |
90 # get stderr, allowing for case where it's very large | 90 # get stderr, allowing for case where it's very large |
91 tmp = open( fname, 'rb' ) | 91 tmp = open( fname, 'rb' ) |
92 s = '' | 92 s = '' |
93 buffsize = 1048576 | 93 buffsize = 1048576 |
100 break | 100 break |
101 except OverflowError: | 101 except OverflowError: |
102 pass | 102 pass |
103 tmp.close() | 103 tmp.close() |
104 except Exception, e: | 104 except Exception, e: |
105 stop_err( 'Read Large Exception : %s' % str( e ) ) | 105 stop_err( 'Read Large Exception : %s' % str( e ) ) |
106 return s | 106 return s |
107 | 107 |
108 def runCL(self,cl=None,output_dir=None): | 108 def runCL(self,cl=None,output_dir=None): |
109 """ construct and run a command line | 109 """ construct and run a command line |
110 we have galaxy's temp path as opt.temp_dir so don't really need isolation | 110 we have galaxy's temp path as opt.temp_dir so don't really need isolation |
111 sometimes stdout is needed as the output - ugly hacks to deal with potentially vast artifacts | 111 sometimes stdout is needed as the output - ugly hacks to deal with potentially vast artifacts |
112 """ | 112 """ |
113 assert cl <> None, 'PicardBase runCL needs a command line as cl' | 113 assert cl <> None, 'PicardBase runCL needs a command line as cl' |
114 if output_dir == None: | 114 if output_dir == None: |
115 output_dir = self.opts.outdir | 115 output_dir = self.opts.outdir |
116 if type(cl) == type([]): | 116 if type(cl) == type([]): |
117 cl = ' '.join(cl) | 117 cl = ' '.join(cl) |
122 process = subprocess.Popen(cl, shell=True, stderr=tef, stdout=tlf, cwd=output_dir) | 122 process = subprocess.Popen(cl, shell=True, stderr=tef, stdout=tlf, cwd=output_dir) |
123 rval = process.wait() | 123 rval = process.wait() |
124 tlf.close() | 124 tlf.close() |
125 tef.close() | 125 tef.close() |
126 stderrs = self.readLarge(temperr) | 126 stderrs = self.readLarge(temperr) |
127 stdouts = self.readLarge(templog) | 127 stdouts = self.readLarge(templog) |
128 if rval > 0: | 128 if rval > 0: |
129 s = '## executing %s returned status %d and stderr: \n%s\n' % (cl,rval,stderrs) | 129 s = '## executing %s returned status %d and stderr: \n%s\n' % (cl,rval,stderrs) |
130 stdouts = '%s\n%s' % (stdouts,stderrs) | 130 stdouts = '%s\n%s' % (stdouts,stderrs) |
131 else: | 131 else: |
132 s = '## executing %s returned status %d and nothing on stderr\n' % (cl,rval) | 132 s = '## executing %s returned status %d and nothing on stderr\n' % (cl,rval) |
133 logging.info(s) | 133 logging.info(s) |
134 os.unlink(templog) # always | 134 os.unlink(templog) # always |
135 os.unlink(temperr) # always | 135 os.unlink(temperr) # always |
136 return s, stdouts, rval # sometimes s is an output | 136 return s, stdouts, rval # sometimes s is an output |
137 | 137 |
138 def runPic(self, jar, cl): | 138 def runPic(self, jar, cl): |
139 """ | 139 """ |
140 cl should be everything after the jar file name in the command | 140 cl should be everything after the jar file name in the command |
141 """ | 141 """ |
142 runme = ['java -Xmx%s' % self.opts.maxjheap] | 142 runme = ['java -Xmx%s' % self.opts.maxjheap] |
143 runme.append(" -Djava.io.tmpdir='%s' " % self.opts.tmpdir) | 143 runme.append(" -Djava.io.tmpdir='%s' " % self.opts.tmpdir) |
144 runme.append('-jar %s' % jar) | 144 runme.append('-jar %s' % jar) |
145 runme += cl | 145 runme += cl |
146 | |
147 print runme,self.opts.outdir | |
148 | |
146 s,stdouts,rval = self.runCL(cl=runme, output_dir=self.opts.outdir) | 149 s,stdouts,rval = self.runCL(cl=runme, output_dir=self.opts.outdir) |
147 return stdouts,rval | 150 return stdouts,rval |
148 | 151 |
149 def samToBam(self,infile=None,outdir=None): | 152 def samToBam(self,infile=None,outdir=None): |
150 """ | 153 """ |
151 use samtools view to convert sam to bam | 154 use samtools view to convert sam to bam |
152 """ | 155 """ |
153 fd,tempbam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.bam') | 156 fd,tempbam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.bam') |
154 cl = ['samtools view -h -b -S -o ',tempbam,infile] | 157 cl = ['samtools view -h -b -S -o ',tempbam,infile] |
155 tlog,stdouts,rval = self.runCL(cl,outdir) | 158 tlog,stdouts,rval = self.runCL(cl,outdir) |
156 return tlog,tempbam,rval | 159 return tlog,tempbam,rval |
157 | 160 |
158 def sortSam(self, infile=None,outfile=None,outdir=None): | 161 def sortSam(self, infile=None,outfile=None,outdir=None): |
159 """ | 162 """ |
160 """ | 163 """ |
161 print '## sortSam got infile=%s,outfile=%s,outdir=%s' % (infile,outfile,outdir) | 164 print '## sortSam got infile=%s,outfile=%s,outdir=%s' % (infile,outfile,outdir) |
162 cl = ['samtools sort',infile,outfile] | 165 cl = ['samtools sort',infile,outfile] |
163 tlog,stdouts,rval = self.runCL(cl,outdir) | 166 tlog,stdouts,rval = self.runCL(cl,outdir) |
164 return tlog | 167 return tlog |
165 | 168 |
170 except: | 173 except: |
171 pass | 174 pass |
172 | 175 |
173 def prettyPicout(self,transpose,maxrows): | 176 def prettyPicout(self,transpose,maxrows): |
174 """organize picard outpouts into a report html page | 177 """organize picard outpouts into a report html page |
175 """ | 178 """ |
176 res = [] | 179 res = [] |
177 try: | 180 try: |
178 r = open(self.metricsOut,'r').readlines() | 181 r = open(self.metricsOut,'r').readlines() |
179 except: | 182 except: |
180 r = [] | 183 r = [] |
181 if len(r) > 0: | 184 if len(r) > 0: |
182 res.append('<b>Picard on line resources</b><ul>\n') | 185 res.append('<b>Picard on line resources</b><ul>\n') |
183 res.append('<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li>\n') | 186 res.append('<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li>\n') |
184 res.append('<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/>\n') | 187 res.append('<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/>\n') |
185 if transpose: | 188 if transpose: |
186 res.append('<b>Picard output (transposed to make it easier to see)</b><hr/>\n') | 189 res.append('<b>Picard output (transposed to make it easier to see)</b><hr/>\n') |
187 else: | 190 else: |
188 res.append('<b>Picard output</b><hr/>\n') | 191 res.append('<b>Picard output</b><hr/>\n') |
189 res.append('<table cellpadding="3" >\n') | 192 res.append('<table cellpadding="3" >\n') |
190 dat = [] | 193 dat = [] |
191 heads = [] | 194 heads = [] |
192 lastr = len(r) - 1 | 195 lastr = len(r) - 1 |
193 # special case for estimate library complexity hist | 196 # special case for estimate library complexity hist |
206 res += hres | 209 res += hres |
207 heads = [] | 210 heads = [] |
208 if len(dat) > 0: | 211 if len(dat) > 0: |
209 if transpose and not thist: | 212 if transpose and not thist: |
210 tdat = map(None,*dat) # transpose an arbitrary list of lists | 213 tdat = map(None,*dat) # transpose an arbitrary list of lists |
211 tdat = ['<tr class="d%d"><td>%s</td><td>%s </td></tr>\n' % ((i+len(heads)) % 2,x[0],x[1]) for i,x in enumerate(tdat)] | 214 tdat = ['<tr class="d%d"><td>%s</td><td>%s </td></tr>\n' % ((i+len(heads)) % 2,x[0],x[1]) for i,x in enumerate(tdat)] |
212 else: | 215 else: |
213 tdat = ['\t'.join(x).strip() for x in dat] # back to strings :( | 216 tdat = ['\t'.join(x).strip() for x in dat] # back to strings :( |
214 tdat = ['<tr class="d%d"><td colspan="2">%s</td></tr>\n' % ((i+len(heads)) % 2,x) for i,x in enumerate(tdat)] | 217 tdat = ['<tr class="d%d"><td colspan="2">%s</td></tr>\n' % ((i+len(heads)) % 2,x) for i,x in enumerate(tdat)] |
215 res += tdat | 218 res += tdat |
216 dat = [] | 219 dat = [] |
217 res.append('</table>\n') | 220 res.append('</table>\n') |
218 return res | 221 return res |
219 | 222 |
220 def fixPicardOutputs(self,transpose,maxloglines): | 223 def fixPicardOutputs(self,transpose,maxloglines): |
221 """ | 224 """ |
222 picard produces long hard to read tab header files | 225 picard produces long hard to read tab header files |
223 make them available but present them transposed for readability | 226 make them available but present them transposed for readability |
224 """ | 227 """ |
225 logging.shutdown() | 228 logging.shutdown() |
226 self.cleanup() # remove temp files stored in delme | 229 self.cleanup() # remove temp files stored in delme |
227 rstyle="""<style type="text/css"> | 230 rstyle="""<style type="text/css"> |
228 tr.d0 td {background-color: oldlace; color: black;} | 231 tr.d0 td {background-color: oldlace; color: black;} |
229 tr.d1 td {background-color: aliceblue; color: black;} | 232 tr.d1 td {background-color: aliceblue; color: black;} |
230 </style>""" | 233 </style>""" |
231 res = [rstyle,] | 234 res = [rstyle,] |
232 res.append(galhtmlprefix % self.progname) | 235 res.append(galhtmlprefix % self.progname) |
233 res.append(galhtmlattr % (self.picname,timenow())) | 236 res.append(galhtmlattr % (self.picname,timenow())) |
234 flist = [x for x in os.listdir(self.opts.outdir) if not x.startswith('.')] | 237 flist = [x for x in os.listdir(self.opts.outdir) if not x.startswith('.')] |
235 pdflist = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'] | 238 pdflist = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'] |
236 if len(pdflist) > 0: # assumes all pdfs come with thumbnail .jpgs | 239 if len(pdflist) > 0: # assumes all pdfs come with thumbnail .jpgs |
237 for p in pdflist: | 240 for p in pdflist: |
238 pbase = os.path.splitext(p)[0] # removes .pdf | 241 pbase = os.path.splitext(p)[0] # removes .pdf |
239 imghref = '%s.jpg' % pbase | 242 imghref = '%s.jpg' % pbase |
240 mimghref = '%s-0.jpg' % pbase # multiple pages pdf -> multiple thumbnails without asking! | 243 mimghref = '%s-0.jpg' % pbase # multiple pages pdf -> multiple thumbnails without asking! |
241 if mimghref in flist: | 244 if mimghref in flist: |
242 imghref=mimghref # only one for thumbnail...it's a multi page pdf | 245 imghref=mimghref # only one for thumbnail...it's a multi page pdf |
243 res.append('<table cellpadding="10"><tr><td>\n') | 246 res.append('<table cellpadding="10"><tr><td>\n') |
244 res.append('<a href="%s"><img src="%s" title="Click image preview for a print quality PDF version" hspace="10" align="middle"></a>\n' % (p,imghref)) | 247 res.append('<a href="%s"><img src="%s" title="Click image preview for a print quality PDF version" hspace="10" align="middle"></a>\n' % (p,imghref)) |
245 res.append('</tr></td></table>\n') | 248 res.append('</tr></td></table>\n') |
246 if len(flist) > 0: | 249 if len(flist) > 0: |
247 res.append('<b>The following output files were created (click the filename to view/download a copy):</b><hr/>') | 250 res.append('<b>The following output files were created (click the filename to view/download a copy):</b><hr/>') |
248 res.append('<table>\n') | 251 res.append('<table>\n') |
249 for i,f in enumerate(flist): | 252 for i,f in enumerate(flist): |
250 fn = os.path.split(f)[-1] | 253 fn = os.path.split(f)[-1] |
251 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fn)) | 254 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fn)) |
252 res.append('</table><p/>\n') | 255 res.append('</table><p/>\n') |
253 pres = self.prettyPicout(transpose,maxloglines) | 256 pres = self.prettyPicout(transpose,maxloglines) |
254 if len(pres) > 0: | 257 if len(pres) > 0: |
255 res += pres | 258 res += pres |
256 l = open(self.log_filename,'r').readlines() | 259 l = open(self.log_filename,'r').readlines() |
257 llen = len(l) | 260 llen = len(l) |
258 if llen > 0: | 261 if llen > 0: |
259 res.append('<b>Picard Tool Run Log</b><hr/>\n') | 262 res.append('<b>Picard Tool Run Log</b><hr/>\n') |
260 rlog = ['<pre>',] | 263 rlog = ['<pre>',] |
261 if llen > maxloglines: | 264 if llen > maxloglines: |
262 n = min(50,int(maxloglines/2)) | 265 n = min(50,int(maxloglines/2)) |
263 rlog += l[:n] | 266 rlog += l[:n] |
264 rlog.append('------------ ## %d rows deleted ## --------------\n' % (llen-maxloglines)) | 267 rlog.append('------------ ## %d rows deleted ## --------------\n' % (llen-maxloglines)) |
269 if llen > maxloglines: | 272 if llen > maxloglines: |
270 rlog.append('\n<b>## WARNING - %d log lines truncated - <a href="%s">%s</a> contains entire output</b>' % (llen - maxloglines,self.log_filename,self.log_filename)) | 273 rlog.append('\n<b>## WARNING - %d log lines truncated - <a href="%s">%s</a> contains entire output</b>' % (llen - maxloglines,self.log_filename,self.log_filename)) |
271 res += rlog | 274 res += rlog |
272 else: | 275 else: |
273 res.append("### Odd, Picard left no log file %s - must have really barfed badly?\n" % self.log_filename) | 276 res.append("### Odd, Picard left no log file %s - must have really barfed badly?\n" % self.log_filename) |
274 res.append('<hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n') | 277 res.append('<hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n') |
275 res.append( 'generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool') | 278 res.append( 'generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool') |
276 res.append(galhtmlpostfix) | 279 res.append(galhtmlpostfix) |
277 outf = open(self.opts.htmlout,'w') | 280 outf = open(self.opts.htmlout,'w') |
278 outf.write(''.join(res)) | 281 outf.write(''.join(res)) |
279 outf.write('\n') | 282 outf.write('\n') |
280 outf.close() | 283 outf.close() |
281 | 284 |
282 def makePicInterval(self,inbed=None,outf=None): | 285 def makePicInterval(self,inbed=None,outf=None): |
283 """ | 286 """ |
284 picard wants bait and target files to have the same header length as the incoming bam/sam | 287 picard wants bait and target files to have the same header length as the incoming bam/sam |
285 a meaningful (ie accurate) representation will fail because of this - so this hack | 288 a meaningful (ie accurate) representation will fail because of this - so this hack |
286 it would be far better to be able to supply the original bed untouched | 289 it would be far better to be able to supply the original bed untouched |
287 Additional checking added Ross Lazarus Dec 2011 to deal with two 'bug' reports on the list | 290 Additional checking added Ross Lazarus Dec 2011 to deal with two 'bug' reports on the list |
288 """ | 291 """ |
289 assert inbed <> None | 292 assert inbed <> None |
290 bed = open(inbed,'r').readlines() | 293 bed = open(inbed,'r').readlines() |
291 sbed = [x.split('\t') for x in bed] # lengths MUST be 5 | 294 sbed = [x.split('\t') for x in bed] # lengths MUST be 5 |
292 lens = [len(x) for x in sbed] | 295 lens = [len(x) for x in sbed] |
293 strands = [x[3] for x in sbed if not x[3] in ['+','-']] | 296 strands = [x[3] for x in sbed if not x[3] in ['+','-']] |
318 o.close() | 321 o.close() |
319 return outf | 322 return outf |
320 | 323 |
321 def cleanSam(self, insam=None, newsam=None, picardErrors=[],outformat=None): | 324 def cleanSam(self, insam=None, newsam=None, picardErrors=[],outformat=None): |
322 """ | 325 """ |
323 interesting problem - if paired, must remove mate pair of errors too or we have a new set of errors after cleaning - missing mate pairs! | 326 interesting problem - if paired, must remove mate pair of errors too or we have a new set of errors after cleaning - missing mate pairs! |
324 Do the work of removing all the error sequences | 327 Do the work of removing all the error sequences |
325 pysam is cool | 328 pysam is cool |
326 infile = pysam.Samfile( "-", "r" ) | 329 infile = pysam.Samfile( "-", "r" ) |
327 outfile = pysam.Samfile( "-", "w", template = infile ) | 330 outfile = pysam.Samfile( "-", "w", template = infile ) |
328 for s in infile: outfile.write(s) | 331 for s in infile: outfile.write(s) |
329 | 332 |
330 errors from ValidateSameFile.jar look like | 333 errors from ValidateSameFile.jar look like |
331 WARNING: Record 32, Read name SRR006041.1202260, NM tag (nucleotide differences) is missing | 334 WARNING: Record 32, Read name SRR006041.1202260, NM tag (nucleotide differences) is missing |
332 ERROR: Record 33, Read name SRR006041.1042721, Empty sequence dictionary. | 335 ERROR: Record 33, Read name SRR006041.1042721, Empty sequence dictionary. |
333 ERROR: Record 33, Read name SRR006041.1042721, RG ID on SAMRecord not found in header: SRR006041 | 336 ERROR: Record 33, Read name SRR006041.1042721, RG ID on SAMRecord not found in header: SRR006041 |
334 | 337 |
335 """ | 338 """ |
336 assert os.path.isfile(insam), 'rgPicardValidate cleansam needs an input sam file - cannot find %s' % insam | 339 assert os.path.isfile(insam), 'rgPicardValidate cleansam needs an input sam file - cannot find %s' % insam |
337 assert newsam <> None, 'rgPicardValidate cleansam needs an output new sam file path' | 340 assert newsam <> None, 'rgPicardValidate cleansam needs an output new sam file path' |
338 removeNames = [x.split(',')[1].replace(' Read name ','') for x in picardErrors if len(x.split(',')) > 2] | 341 removeNames = [x.split(',')[1].replace(' Read name ','') for x in picardErrors if len(x.split(',')) > 2] |
339 remDict = dict(zip(removeNames,range(len(removeNames)))) | 342 remDict = dict(zip(removeNames,range(len(removeNames)))) |
340 infile = pysam.Samfile(insam,'rb') | 343 infile = pysam.Samfile(insam,'rb') |
362 | 365 |
363 | 366 |
364 def __main__(): | 367 def __main__(): |
365 doFix = False # tools returning htmlfile don't need this | 368 doFix = False # tools returning htmlfile don't need this |
366 doTranspose = True # default | 369 doTranspose = True # default |
367 maxloglines = 100 # default | 370 maxloglines = 100 # default |
368 #Parse Command Line | 371 #Parse Command Line |
369 op = optparse.OptionParser() | 372 op = optparse.OptionParser() |
370 # All tools | 373 # All tools |
371 op.add_option('-i', '--input', dest='input', help='Input SAM or BAM file' ) | 374 op.add_option('-i', '--input', dest='input', help='Input SAM or BAM file' ) |
372 op.add_option('-e', '--inputext', default=None) | 375 op.add_option('-e', '--inputext', default=None) |
373 op.add_option('-o', '--output', default=None) | 376 op.add_option('-o', '--output', default=None) |
374 op.add_option('-n', '--title', default="Pick a Picard Tool") | 377 op.add_option('-n', '--title', default="Pick a Picard Tool") |
375 op.add_option('-t', '--htmlout', default=None) | 378 op.add_option('-t', '--htmlout', default=None) |
376 op.add_option('-d', '--outdir', default=None) | 379 op.add_option('-d', '--outdir', default=None) |
377 op.add_option('-x', '--maxjheap', default='4g') | 380 op.add_option('-x', '--maxjheap', default='3000m') |
378 op.add_option('-b', '--bisulphite', default='false') | 381 op.add_option('-b', '--bisulphite', default='false') |
379 op.add_option('-s', '--sortorder', default='query') | 382 op.add_option('-s', '--sortorder', default='query') |
380 op.add_option('','--tmpdir', default='/tmp') | 383 op.add_option('','--tmpdir', default='/tmp') |
381 op.add_option('-j','--jar',default='') | 384 op.add_option('-j','--jar',default='') |
382 op.add_option('','--picard-cmd',default=None) | 385 op.add_option('','--picard-cmd',default=None) |
383 # Many tools | 386 # Many tools |
384 op.add_option( '', '--output-format', dest='output_format', help='Output format' ) | 387 op.add_option( '', '--output-format', dest='output_format', help='Output format' ) |
385 op.add_option( '', '--bai-file', dest='bai_file', help='The path to the index file for the input bam file' ) | 388 op.add_option( '', '--bai-file', dest='bai_file', help='The path to the index file for the input bam file' ) |
386 op.add_option( '', '--ref', dest='ref', help='Built-in reference with fasta and dict file', default=None ) | 389 op.add_option( '', '--ref', dest='ref', help='Built-in reference with fasta and dict file', default=None ) |
387 # CreateSequenceDictionary | 390 # CreateSequenceDictionary |
402 op.add_option('', '--maxinsert', default="20") | 405 op.add_option('', '--maxinsert', default="20") |
403 op.add_option('', '--adaptors', default='') | 406 op.add_option('', '--adaptors', default='') |
404 # FixMateInformation and validate | 407 # FixMateInformation and validate |
405 # CollectGcBiasMetrics | 408 # CollectGcBiasMetrics |
406 op.add_option('', '--windowsize', default='100') | 409 op.add_option('', '--windowsize', default='100') |
407 op.add_option('', '--mingenomefrac', default='0.00001') | 410 op.add_option('', '--mingenomefrac', default='0.00001') |
408 # AddOrReplaceReadGroups | 411 # AddOrReplaceReadGroups |
409 op.add_option( '', '--rg-opts', dest='rg_opts', help='Specify extra (optional) arguments with full, otherwise preSet' ) | 412 op.add_option( '', '--rg-opts', dest='rg_opts', help='Specify extra (optional) arguments with full, otherwise preSet' ) |
410 op.add_option( '', '--rg-lb', dest='rg_library', help='Read Group Library' ) | 413 op.add_option( '', '--rg-lb', dest='rg_library', help='Read Group Library' ) |
411 op.add_option( '', '--rg-pl', dest='rg_platform', help='Read Group platform (e.g. illumina, solid)' ) | 414 op.add_option( '', '--rg-pl', dest='rg_platform', help='Read Group platform (e.g. illumina, solid)' ) |
412 op.add_option( '', '--rg-pu', dest='rg_plat_unit', help='Read Group platform unit (eg. run barcode) ' ) | 415 op.add_option( '', '--rg-pu', dest='rg_plat_unit', help='Read Group platform unit (eg. run barcode) ' ) |
418 op.add_option( '', '--allow-inc-dict-concord', dest='allow_inc_dict_concord', help='Allow incomplete dict concordance' ) | 421 op.add_option( '', '--allow-inc-dict-concord', dest='allow_inc_dict_concord', help='Allow incomplete dict concordance' ) |
419 op.add_option( '', '--allow-contig-len-discord', dest='allow_contig_len_discord', help='Allow contig length discordance' ) | 422 op.add_option( '', '--allow-contig-len-discord', dest='allow_contig_len_discord', help='Allow contig length discordance' ) |
420 # ReplaceSamHeader | 423 # ReplaceSamHeader |
421 op.add_option( '', '--header-file', dest='header_file', help='sam or bam file from which header will be read' ) | 424 op.add_option( '', '--header-file', dest='header_file', help='sam or bam file from which header will be read' ) |
422 | 425 |
423 op.add_option('','--assumesorted', default='true') | 426 op.add_option('','--assumesorted', default='true') |
424 op.add_option('','--readregex', default="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*") | 427 op.add_option('','--readregex', default="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*") |
425 #estimatelibrarycomplexity | 428 #estimatelibrarycomplexity |
426 op.add_option('','--minid', default="5") | 429 op.add_option('','--minid', default="5") |
427 op.add_option('','--maxdiff', default="0.03") | 430 op.add_option('','--maxdiff', default="0.03") |
428 op.add_option('','--minmeanq', default="20") | 431 op.add_option('','--minmeanq', default="20") |
437 op.add_option('','--samout', default=None) | 440 op.add_option('','--samout', default=None) |
438 #downsample | 441 #downsample |
439 op.add_option('','--probability', default="1") | 442 op.add_option('','--probability', default="1") |
440 op.add_option('','--seed', default="1") | 443 op.add_option('','--seed', default="1") |
441 | 444 |
442 ## | |
443 opts, args = op.parse_args() | 445 opts, args = op.parse_args() |
444 opts.sortme = opts.assumesorted == 'false' | 446 opts.sortme = opts.assumesorted == 'false' |
445 assert opts.input <> None | 447 assert opts.input <> None |
446 # need to add | 448 # need to add |
447 # instance that does all the work | 449 # instance that does all the work |
540 if float(opts.optdupdist) > 0: | 542 if float(opts.optdupdist) > 0: |
541 cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist) | 543 cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist) |
542 stdouts,rval = pic.runPic(opts.jar, cl) | 544 stdouts,rval = pic.runPic(opts.jar, cl) |
543 | 545 |
544 elif pic.picname == 'CollectAlignmentSummaryMetrics': | 546 elif pic.picname == 'CollectAlignmentSummaryMetrics': |
545 # Why do we do this fakefasta thing? | 547 # Why do we do this fakefasta thing? |
546 # Because we need NO fai to be available or picard barfs unless it matches the input data. | 548 # Because we need NO fai to be available or picard barfs unless it matches the input data. |
547 # why? Dunno Seems to work without complaining if the .bai file is AWOL.... | 549 # why? Dunno Seems to work without complaining if the .bai file is AWOL.... |
548 fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name)) | 550 fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name)) |
549 try: | 551 try: |
550 os.symlink(ref_file_name,fakefasta) | 552 os.symlink(ref_file_name,fakefasta) |
562 cl.append('OUTPUT=%s' % pic.metricsOut) | 564 cl.append('OUTPUT=%s' % pic.metricsOut) |
563 cl.append('R=%s' % fakefasta) | 565 cl.append('R=%s' % fakefasta) |
564 cl.append('TMP_DIR=%s' % opts.tmpdir) | 566 cl.append('TMP_DIR=%s' % opts.tmpdir) |
565 if not opts.assumesorted.lower() == 'true': # we need to sort input | 567 if not opts.assumesorted.lower() == 'true': # we need to sort input |
566 sortedfile = '%s.sorted' % os.path.basename(opts.input) | 568 sortedfile = '%s.sorted' % os.path.basename(opts.input) |
567 if opts.datatype == 'sam': # need to work with a bam | 569 if opts.datatype == 'sam': # need to work with a bam |
568 tlog,tempbam,trval = pic.samToBam(opts.input,opts.outdir) | 570 tlog,tempbam,trval = pic.samToBam(opts.input,opts.outdir) |
569 pic.delme.append(tempbam) | 571 pic.delme.append(tempbam) |
570 try: | 572 try: |
571 tlog = pic.sortSam(tempbam,sortedfile,opts.outdir) | 573 tlog = pic.sortSam(tempbam,sortedfile,opts.outdir) |
572 except: | 574 except: |
577 except : # bug - [bam_sort_core] not being ignored - TODO fixme | 579 except : # bug - [bam_sort_core] not being ignored - TODO fixme |
578 print '## exception %s on sorting bam file %s' % (sys.exc_info()[0],opts.input) | 580 print '## exception %s on sorting bam file %s' % (sys.exc_info()[0],opts.input) |
579 cl.append('INPUT=%s.bam' % os.path.abspath(os.path.join(opts.outdir,sortedfile))) | 581 cl.append('INPUT=%s.bam' % os.path.abspath(os.path.join(opts.outdir,sortedfile))) |
580 pic.delme.append(os.path.join(opts.outdir,sortedfile)) | 582 pic.delme.append(os.path.join(opts.outdir,sortedfile)) |
581 else: | 583 else: |
582 cl.append('INPUT=%s' % os.path.abspath(opts.input)) | 584 cl.append('INPUT=%s' % os.path.abspath(opts.input)) |
583 stdouts,rval = pic.runPic(opts.jar, cl) | 585 stdouts,rval = pic.runPic(opts.jar, cl) |
584 | 586 |
585 | 587 |
586 elif pic.picname == 'CollectGcBiasMetrics': | 588 elif pic.picname == 'CollectGcBiasMetrics': |
587 assert os.path.isfile(ref_file_name),'PicardGC needs a reference sequence - cannot read %s' % ref_file_name | 589 assert os.path.isfile(ref_file_name),'PicardGC needs a reference sequence - cannot read %s' % ref_file_name |
588 # sigh. Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data. | 590 # sigh. Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data. |
589 # why? Dunno | 591 # why? Dunno |
590 fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name)) | 592 fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name)) |
591 try: | 593 try: |
592 os.symlink(ref_file_name,fakefasta) | 594 os.symlink(ref_file_name,fakefasta) |
593 except: | 595 except: |
594 s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy' | 596 s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy' |
619 lf.write('\n') | 621 lf.write('\n') |
620 lf.close() | 622 lf.close() |
621 | 623 |
622 elif pic.picname == 'CollectInsertSizeMetrics': | 624 elif pic.picname == 'CollectInsertSizeMetrics': |
623 """ <command interpreter="python"> | 625 """ <command interpreter="python"> |
624 picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" --deviations "$deviations" | 626 picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" --deviations "$deviations" |
625 --histwidth "$histWidth" --minpct "$minPct" --malevel "$malevel" | 627 --histwidth "$histWidth" --minpct "$minPct" --malevel "$malevel" |
626 -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/picard/CollectInsertSizeMetrics.jar" -d "$html_file.files_path" -t "$html_file" | 628 -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/picard/CollectInsertSizeMetrics.jar" -d "$html_file.files_path" -t "$html_file" |
627 </command> | 629 </command> |
628 """ | 630 """ |
629 isPDF = 'InsertSizeHist.pdf' | 631 isPDF = 'InsertSizeHist.pdf' |
630 pdfpath = os.path.join(opts.outdir,isPDF) | 632 pdfpath = os.path.join(opts.outdir,isPDF) |
631 histpdf = 'InsertSizeHist.pdf' | 633 histpdf = 'InsertSizeHist.pdf' |
632 cl.append('I=%s' % opts.input) | 634 cl.append('I=%s' % opts.input) |
633 cl.append('O=%s' % pic.metricsOut) | 635 cl.append('O=%s' % pic.metricsOut) |
634 cl.append('HISTOGRAM_FILE=%s' % histpdf) | 636 cl.append('HISTOGRAM_FILE=%s' % histpdf) |
635 #if opts.taillimit <> '0': # this was deprecated although still mentioned in the docs at 1.56 | 637 #if opts.taillimit <> '0': # this was deprecated although still mentioned in the docs at 1.56 |
636 # cl.append('TAIL_LIMIT=%s' % opts.taillimit) | 638 # cl.append('TAIL_LIMIT=%s' % opts.taillimit) |
637 if opts.histwidth <> '0': | 639 if opts.histwidth <> '0': |
638 cl.append('HISTOGRAM_WIDTH=%s' % opts.histwidth) | 640 cl.append('HISTOGRAM_WIDTH=%s' % opts.histwidth) |
639 if float( opts.minpct) > 0.0: | 641 if float( opts.minpct) > 0.0: |
640 cl.append('MINIMUM_PCT=%s' % opts.minpct) | 642 cl.append('MINIMUM_PCT=%s' % opts.minpct) |
641 if float(opts.deviations) > 0.0: | 643 if float(opts.deviations) > 0.0: |
642 cl.append('DEVIATIONS=%s' % opts.deviations) | 644 cl.append('DEVIATIONS=%s' % opts.deviations) |
643 if opts.malevel: | 645 if opts.malevel: |
644 malists = opts.malevel.split(',') | 646 malists = opts.malevel.split(',') |
645 malist = ['METRIC_ACCUMULATION_LEVEL=%s' % x for x in malists] | 647 malist = ['METRIC_ACCUMULATION_LEVEL=%s' % x for x in malists] |
646 cl += malist | 648 cl += malist |
647 stdouts,rval = pic.runPic(opts.jar, cl) | 649 stdouts,rval = pic.runPic(opts.jar, cl) |
648 if os.path.exists(pdfpath): # automake thumbnail - will be added to html | 650 if os.path.exists(pdfpath): # automake thumbnail - will be added to html |
649 cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath] | 651 cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath] |
650 pic.runCL(cl=cl2,output_dir=opts.outdir) | 652 pic.runCL(cl=cl2,output_dir=opts.outdir) |
651 else: | 653 else: |
652 s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath | 654 s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath |
653 s += 'This <b>always happens if single ended data was provided</b> to this tool,\n' | 655 s += 'This <b>always happens if single ended data was provided</b> to this tool,\n' |
661 # assume sorted even if header says otherwise | 663 # assume sorted even if header says otherwise |
662 cl.append('ASSUME_SORTED=%s' % (opts.assumesorted)) | 664 cl.append('ASSUME_SORTED=%s' % (opts.assumesorted)) |
663 # input | 665 # input |
664 cl.append('INPUT=%s' % opts.input) | 666 cl.append('INPUT=%s' % opts.input) |
665 # outputs | 667 # outputs |
666 cl.append('OUTPUT=%s' % opts.output) | 668 cl.append('OUTPUT=%s' % opts.output) |
667 cl.append('METRICS_FILE=%s' % pic.metricsOut ) | 669 cl.append('METRICS_FILE=%s' % pic.metricsOut ) |
668 # remove or mark duplicates | 670 # remove or mark duplicates |
669 cl.append('REMOVE_DUPLICATES=%s' % opts.remdups) | 671 cl.append('REMOVE_DUPLICATES=%s' % opts.remdups) |
670 # the regular expression to be used to parse reads in incoming SAM file | 672 # the regular expression to be used to parse reads in incoming SAM file |
671 cl.append('READ_NAME_REGEX="%s"' % opts.readregex) | 673 cl.append('READ_NAME_REGEX="%s"' % opts.readregex) |
709 targetfname = os.path.join(opts.outdir,'rgPicardHsMetrics.target') | 711 targetfname = os.path.join(opts.outdir,'rgPicardHsMetrics.target') |
710 baitf = pic.makePicInterval(opts.baitbed,baitfname) | 712 baitf = pic.makePicInterval(opts.baitbed,baitfname) |
711 if opts.targetbed == opts.baitbed: # same file sometimes | 713 if opts.targetbed == opts.baitbed: # same file sometimes |
712 targetf = baitf | 714 targetf = baitf |
713 else: | 715 else: |
714 targetf = pic.makePicInterval(opts.targetbed,targetfname) | 716 targetf = pic.makePicInterval(opts.targetbed,targetfname) |
715 cl.append('BAIT_INTERVALS=%s' % baitf) | 717 cl.append('BAIT_INTERVALS=%s' % baitf) |
716 cl.append('TARGET_INTERVALS=%s' % targetf) | 718 cl.append('TARGET_INTERVALS=%s' % targetf) |
717 cl.append('INPUT=%s' % os.path.abspath(opts.input)) | 719 cl.append('INPUT=%s' % os.path.abspath(opts.input)) |
718 cl.append('OUTPUT=%s' % pic.metricsOut) | 720 cl.append('OUTPUT=%s' % pic.metricsOut) |
719 cl.append('TMP_DIR=%s' % opts.tmpdir) | 721 cl.append('TMP_DIR=%s' % opts.tmpdir) |
723 import pysam | 725 import pysam |
724 doTranspose = False | 726 doTranspose = False |
725 sortedfile = os.path.join(opts.outdir,'rgValidate.sorted') | 727 sortedfile = os.path.join(opts.outdir,'rgValidate.sorted') |
726 stf = open(pic.log_filename,'w') | 728 stf = open(pic.log_filename,'w') |
727 tlog = None | 729 tlog = None |
728 if opts.datatype == 'sam': # need to work with a bam | 730 if opts.datatype == 'sam': # need to work with a bam |
729 tlog,tempbam,rval = pic.samToBam(opts.input,opts.outdir) | 731 tlog,tempbam,rval = pic.samToBam(opts.input,opts.outdir) |
730 try: | 732 try: |
731 tlog = pic.sortSam(tempbam,sortedfile,opts.outdir) | 733 tlog = pic.sortSam(tempbam,sortedfile,opts.outdir) |
732 except: | 734 except: |
733 print '## exception on sorting sam file %s' % opts.input | 735 print '## exception on sorting sam file %s' % opts.input |
738 print '## exception on sorting bam file %s' % opts.input | 740 print '## exception on sorting bam file %s' % opts.input |
739 if tlog: | 741 if tlog: |
740 print '##tlog=',tlog | 742 print '##tlog=',tlog |
741 stf.write(tlog) | 743 stf.write(tlog) |
742 stf.write('\n') | 744 stf.write('\n') |
743 sortedfile = '%s.bam' % sortedfile # samtools does that | 745 sortedfile = '%s.bam' % sortedfile # samtools does that |
744 cl.append('O=%s' % pic.metricsOut) | 746 cl.append('O=%s' % pic.metricsOut) |
745 cl.append('TMP_DIR=%s' % opts.tmpdir) | 747 cl.append('TMP_DIR=%s' % opts.tmpdir) |
746 cl.append('I=%s' % sortedfile) | 748 cl.append('I=%s' % sortedfile) |
747 opts.maxerrors = '99999999' | 749 opts.maxerrors = '99999999' |
748 cl.append('MAX_OUTPUT=%s' % opts.maxerrors) | 750 cl.append('MAX_OUTPUT=%s' % opts.maxerrors) |
750 igs = ['IGNORE=%s' % x for x in opts.ignoreflags if x <> 'None'] | 752 igs = ['IGNORE=%s' % x for x in opts.ignoreflags if x <> 'None'] |
751 cl.append(' '.join(igs)) | 753 cl.append(' '.join(igs)) |
752 if opts.bisulphite.lower() <> 'false': | 754 if opts.bisulphite.lower() <> 'false': |
753 cl.append('IS_BISULFITE_SEQUENCED=true') | 755 cl.append('IS_BISULFITE_SEQUENCED=true') |
754 if opts.ref <> None or opts.ref_file <> None: | 756 if opts.ref <> None or opts.ref_file <> None: |
755 cl.append('R=%s' % ref_file_name) | 757 cl.append('R=%s' % ref_file_name) |
756 stdouts,rval = pic.runPic(opts.jar,cl) | 758 stdouts,rval = pic.runPic(opts.jar,cl) |
757 if opts.datatype == 'sam': | 759 if opts.datatype == 'sam': |
758 pic.delme.append(tempbam) | 760 pic.delme.append(tempbam) |
759 newsam = opts.output | 761 newsam = opts.output |
760 outformat = 'bam' | 762 outformat = 'bam' |
761 pe = open(pic.metricsOut,'r').readlines() | 763 pe = open(pic.metricsOut,'r').readlines() |
762 pic.cleanSam(insam=sortedfile, newsam=newsam, picardErrors=pe,outformat=outformat) | 764 pic.cleanSam(insam=sortedfile, newsam=newsam, picardErrors=pe,outformat=outformat) |
763 pic.delme.append(sortedfile) # not wanted | 765 pic.delme.append(sortedfile) # not wanted |
764 stf.close() | 766 stf.close() |
765 pic.cleanup() | 767 pic.cleanup() |
768 | |
769 ####liubo added CleanSam tool#### | |
770 elif pic.picname == 'CleanSam': | |
771 # input | |
772 cl.append('INPUT=%s' % opts.input) | |
773 # output | |
774 cl.append('OUTPUT=%s' % tempout) | |
775 stdouts,rval = pic.runPic(opts.jar, cl) | |
776 haveTempout = True | |
777 | |
778 elif pic.picname == 'SortSam': | |
779 cl.append('I=%s' % opts.input) | |
780 cl.append('O=%s' % tempout) | |
781 cl.append('SORT_ORDER=%s' % opts.sortorder) | |
782 stdouts,rval = pic.runPic(opts.jar,cl) | |
783 haveTempout = True | |
784 | |
785 elif pic.picname == 'BuildBamIndex': | |
786 cl.append('I=%s' % opts.input) | |
787 cl.append('O=%s' % tempout) | |
788 stdouts,rval = pic.runPic(opts.jar,cl) | |
789 haveTempout = True | |
790 | |
791 elif pic.picname == 'SamFormatConverter': | |
792 cl.append('INPUT=%s' % opts.input) | |
793 cl.append('OUTPUT=%s' % tempout) | |
794 pic.runPic(opts.jar, cl) | |
795 haveTempout = True | |
766 elif pic.picname == "DownsampleSam": | 796 elif pic.picname == "DownsampleSam": |
767 cl.append('I=%s' % opts.input) | 797 cl.append('I=%s' % opts.input) |
768 mystring = opts.output | 798 mystring = opts.output |
769 mystringsam = mystring + ".sam" | 799 mystringsam = mystring + ".sam" |
770 cl.append('O=%s' % mystringsam) | 800 cl.append('O=%s' % mystringsam) |
773 if float(opts.seed) > 0: | 803 if float(opts.seed) > 0: |
774 cl.append('RANDOM_SEED=%s' % opts.seed) | 804 cl.append('RANDOM_SEED=%s' % opts.seed) |
775 stdouts,rval = pic.runPic(opts.jar, cl) | 805 stdouts,rval = pic.runPic(opts.jar, cl) |
776 myoutput = mystringsam.replace(".sam", "") | 806 myoutput = mystringsam.replace(".sam", "") |
777 os.rename(mystringsam,myoutput) | 807 os.rename(mystringsam,myoutput) |
778 elif pic.picname == 'SamFormatConverter': | 808 |
779 cl.append('INPUT=%s' % opts.input) | 809 |
780 cl.append('OUTPUT=%s' % tempout) | |
781 pic.runPic(opts.jar, cl) | |
782 haveTempout = True | |
783 | |
784 else: | 810 else: |
785 print >> sys.stderr,'picard.py got an unknown tool name - %s' % pic.picname | 811 print >> sys.stderr,'picard.py got an unknown tool name - %s' % pic.picname |
786 sys.exit(1) | 812 sys.exit(1) |
787 if haveTempout: | 813 if haveTempout: |
788 # Some Picard tools produced a potentially intermediate bam file. | 814 # Some Picard tools produced a potentially intermediate bam file. |
789 # Either just move to final location or create sam | 815 # Either just move to final location or create sam |
790 if os.path.exists(tempout): | 816 if os.path.exists(tempout): |
791 shutil.move(tempout, os.path.abspath(opts.output)) | 817 shutil.move(tempout, os.path.abspath(opts.output)) |
792 if opts.htmlout <> None or doFix: # return a pretty html page | 818 if opts.htmlout <> None or doFix: # return a pretty html page |
793 pic.fixPicardOutputs(transpose=doTranspose,maxloglines=maxloglines) | 819 pic.fixPicardOutputs(transpose=doTranspose,maxloglines=maxloglines) |
794 if rval <> 0: | 820 if rval <> 0: |
795 print >> sys.stderr, '## exit code=%d; stdout=%s' % (rval,stdouts) | 821 print >> sys.stderr, '## exit code=%d; stdout=%s' % (rval,stdouts) |
796 # signal failure | 822 # signal failure |
797 if __name__=="__main__": __main__() | 823 if __name__=="__main__": __main__() |
798 |