annotate picard_wrapper.py @ 23:0688aff54e92 draft

Uploaded
author devteam
date Wed, 12 Feb 2014 18:02:38 -0500
parents b0585923decc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
22
b0585923decc Uploaded
devteam
parents:
diff changeset
1 #!/usr/bin/env python
b0585923decc Uploaded
devteam
parents:
diff changeset
2 """
b0585923decc Uploaded
devteam
parents:
diff changeset
3 Originally written by Kelly Vincent
b0585923decc Uploaded
devteam
parents:
diff changeset
4 pretty output and additional picard wrappers by Ross Lazarus for rgenetics
b0585923decc Uploaded
devteam
parents:
diff changeset
5 Runs all available wrapped Picard tools.
b0585923decc Uploaded
devteam
parents:
diff changeset
6 usage: picard_wrapper.py [options]
b0585923decc Uploaded
devteam
parents:
diff changeset
7 code Ross wrote licensed under the LGPL
b0585923decc Uploaded
devteam
parents:
diff changeset
8 see http://www.gnu.org/copyleft/lesser.html
b0585923decc Uploaded
devteam
parents:
diff changeset
9 """
b0585923decc Uploaded
devteam
parents:
diff changeset
10
b0585923decc Uploaded
devteam
parents:
diff changeset
11 import optparse, os, sys, subprocess, tempfile, shutil, time, logging
b0585923decc Uploaded
devteam
parents:
diff changeset
12
b0585923decc Uploaded
devteam
parents:
diff changeset
13 galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
b0585923decc Uploaded
devteam
parents:
diff changeset
14 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
b0585923decc Uploaded
devteam
parents:
diff changeset
15 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
b0585923decc Uploaded
devteam
parents:
diff changeset
16 <head>
b0585923decc Uploaded
devteam
parents:
diff changeset
17 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
b0585923decc Uploaded
devteam
parents:
diff changeset
18 <meta name="generator" content="Bluefish 2.2.3" />
b0585923decc Uploaded
devteam
parents:
diff changeset
19 <title></title>
b0585923decc Uploaded
devteam
parents:
diff changeset
20 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
b0585923decc Uploaded
devteam
parents:
diff changeset
21 </head>
b0585923decc Uploaded
devteam
parents:
diff changeset
22 <body>
b0585923decc Uploaded
devteam
parents:
diff changeset
23 <div class="document">
b0585923decc Uploaded
devteam
parents:
diff changeset
24 """
b0585923decc Uploaded
devteam
parents:
diff changeset
25 galhtmlattr = """Galaxy tool %s run at %s</b><br/>"""
b0585923decc Uploaded
devteam
parents:
diff changeset
26 galhtmlpostfix = """</div></body></html>\n"""
b0585923decc Uploaded
devteam
parents:
diff changeset
27
b0585923decc Uploaded
devteam
parents:
diff changeset
28
b0585923decc Uploaded
devteam
parents:
diff changeset
29 def stop_err( msg ):
b0585923decc Uploaded
devteam
parents:
diff changeset
30 sys.stderr.write( '%s\n' % msg )
b0585923decc Uploaded
devteam
parents:
diff changeset
31 sys.exit()
b0585923decc Uploaded
devteam
parents:
diff changeset
32
b0585923decc Uploaded
devteam
parents:
diff changeset
33
b0585923decc Uploaded
devteam
parents:
diff changeset
34 def timenow():
b0585923decc Uploaded
devteam
parents:
diff changeset
35 """return current time as a string
b0585923decc Uploaded
devteam
parents:
diff changeset
36 """
b0585923decc Uploaded
devteam
parents:
diff changeset
37 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
b0585923decc Uploaded
devteam
parents:
diff changeset
38
b0585923decc Uploaded
devteam
parents:
diff changeset
39
b0585923decc Uploaded
devteam
parents:
diff changeset
40 class PicardBase():
b0585923decc Uploaded
devteam
parents:
diff changeset
41 """
b0585923decc Uploaded
devteam
parents:
diff changeset
42 simple base class with some utilities for Picard
b0585923decc Uploaded
devteam
parents:
diff changeset
43 adapted and merged with Kelly Vincent's code april 2011 Ross
b0585923decc Uploaded
devteam
parents:
diff changeset
44 lots of changes...
b0585923decc Uploaded
devteam
parents:
diff changeset
45 """
b0585923decc Uploaded
devteam
parents:
diff changeset
46
b0585923decc Uploaded
devteam
parents:
diff changeset
47 def __init__(self, opts=None,arg0=None):
b0585923decc Uploaded
devteam
parents:
diff changeset
48 """ common stuff needed at init for a picard tool
b0585923decc Uploaded
devteam
parents:
diff changeset
49 """
b0585923decc Uploaded
devteam
parents:
diff changeset
50 assert opts <> None, 'PicardBase needs opts at init'
b0585923decc Uploaded
devteam
parents:
diff changeset
51 self.opts = opts
b0585923decc Uploaded
devteam
parents:
diff changeset
52 if self.opts.outdir == None:
b0585923decc Uploaded
devteam
parents:
diff changeset
53 self.opts.outdir = os.getcwd() # fixmate has no html file eg so use temp dir
b0585923decc Uploaded
devteam
parents:
diff changeset
54 assert self.opts.outdir <> None,'## PicardBase needs a temp directory if no output directory passed in'
b0585923decc Uploaded
devteam
parents:
diff changeset
55 self.picname = self.baseName(opts.jar)
b0585923decc Uploaded
devteam
parents:
diff changeset
56 if self.picname.startswith('picard'):
b0585923decc Uploaded
devteam
parents:
diff changeset
57 self.picname = opts.picard_cmd # special case for some tools like replaceheader?
b0585923decc Uploaded
devteam
parents:
diff changeset
58 self.progname = self.baseName(arg0)
b0585923decc Uploaded
devteam
parents:
diff changeset
59 self.version = '0.002'
b0585923decc Uploaded
devteam
parents:
diff changeset
60 self.delme = [] # list of files to destroy
b0585923decc Uploaded
devteam
parents:
diff changeset
61 self.title = opts.title
b0585923decc Uploaded
devteam
parents:
diff changeset
62 self.inputfile = opts.input
b0585923decc Uploaded
devteam
parents:
diff changeset
63 try:
b0585923decc Uploaded
devteam
parents:
diff changeset
64 os.makedirs(opts.outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
65 except:
b0585923decc Uploaded
devteam
parents:
diff changeset
66 pass
b0585923decc Uploaded
devteam
parents:
diff changeset
67 try:
b0585923decc Uploaded
devteam
parents:
diff changeset
68 os.makedirs(opts.tmpdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
69 except:
b0585923decc Uploaded
devteam
parents:
diff changeset
70 pass
b0585923decc Uploaded
devteam
parents:
diff changeset
71 self.log_filename = os.path.join(self.opts.outdir,'%s.log' % self.picname)
b0585923decc Uploaded
devteam
parents:
diff changeset
72 self.metricsOut = os.path.join(opts.outdir,'%s.metrics.txt' % self.picname)
b0585923decc Uploaded
devteam
parents:
diff changeset
73 self.setLogging(logfname=self.log_filename)
b0585923decc Uploaded
devteam
parents:
diff changeset
74
b0585923decc Uploaded
devteam
parents:
diff changeset
75 def baseName(self,name=None):
b0585923decc Uploaded
devteam
parents:
diff changeset
76 return os.path.splitext(os.path.basename(name))[0]
b0585923decc Uploaded
devteam
parents:
diff changeset
77
b0585923decc Uploaded
devteam
parents:
diff changeset
78 def setLogging(self,logfname="picard_wrapper.log"):
b0585923decc Uploaded
devteam
parents:
diff changeset
79 """setup a logger
b0585923decc Uploaded
devteam
parents:
diff changeset
80 """
b0585923decc Uploaded
devteam
parents:
diff changeset
81 logging.basicConfig(level=logging.INFO,
b0585923decc Uploaded
devteam
parents:
diff changeset
82 filename=logfname,
b0585923decc Uploaded
devteam
parents:
diff changeset
83 filemode='a')
b0585923decc Uploaded
devteam
parents:
diff changeset
84
b0585923decc Uploaded
devteam
parents:
diff changeset
85
b0585923decc Uploaded
devteam
parents:
diff changeset
86 def readLarge(self,fname=None):
b0585923decc Uploaded
devteam
parents:
diff changeset
87 """ read a potentially huge file.
b0585923decc Uploaded
devteam
parents:
diff changeset
88 """
b0585923decc Uploaded
devteam
parents:
diff changeset
89 try:
b0585923decc Uploaded
devteam
parents:
diff changeset
90 # get stderr, allowing for case where it's very large
b0585923decc Uploaded
devteam
parents:
diff changeset
91 tmp = open( fname, 'rb' )
b0585923decc Uploaded
devteam
parents:
diff changeset
92 s = ''
b0585923decc Uploaded
devteam
parents:
diff changeset
93 buffsize = 1048576
b0585923decc Uploaded
devteam
parents:
diff changeset
94 try:
b0585923decc Uploaded
devteam
parents:
diff changeset
95 while True:
b0585923decc Uploaded
devteam
parents:
diff changeset
96 more = tmp.read( buffsize )
b0585923decc Uploaded
devteam
parents:
diff changeset
97 if len(more) > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
98 s += more
b0585923decc Uploaded
devteam
parents:
diff changeset
99 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
100 break
b0585923decc Uploaded
devteam
parents:
diff changeset
101 except OverflowError:
b0585923decc Uploaded
devteam
parents:
diff changeset
102 pass
b0585923decc Uploaded
devteam
parents:
diff changeset
103 tmp.close()
b0585923decc Uploaded
devteam
parents:
diff changeset
104 except Exception, e:
b0585923decc Uploaded
devteam
parents:
diff changeset
105 stop_err( 'Read Large Exception : %s' % str( e ) )
b0585923decc Uploaded
devteam
parents:
diff changeset
106 return s
b0585923decc Uploaded
devteam
parents:
diff changeset
107
b0585923decc Uploaded
devteam
parents:
diff changeset
108 def runCL(self,cl=None,output_dir=None):
b0585923decc Uploaded
devteam
parents:
diff changeset
109 """ construct and run a command line
b0585923decc Uploaded
devteam
parents:
diff changeset
110 we have galaxy's temp path as opt.temp_dir so don't really need isolation
b0585923decc Uploaded
devteam
parents:
diff changeset
111 sometimes stdout is needed as the output - ugly hacks to deal with potentially vast artifacts
b0585923decc Uploaded
devteam
parents:
diff changeset
112 """
b0585923decc Uploaded
devteam
parents:
diff changeset
113 assert cl <> None, 'PicardBase runCL needs a command line as cl'
b0585923decc Uploaded
devteam
parents:
diff changeset
114 if output_dir == None:
b0585923decc Uploaded
devteam
parents:
diff changeset
115 output_dir = self.opts.outdir
b0585923decc Uploaded
devteam
parents:
diff changeset
116 if type(cl) == type([]):
b0585923decc Uploaded
devteam
parents:
diff changeset
117 cl = ' '.join(cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
118 fd,templog = tempfile.mkstemp(dir=output_dir,suffix='rgtempRun.txt')
b0585923decc Uploaded
devteam
parents:
diff changeset
119 tlf = open(templog,'wb')
b0585923decc Uploaded
devteam
parents:
diff changeset
120 fd,temperr = tempfile.mkstemp(dir=output_dir,suffix='rgtempErr.txt')
b0585923decc Uploaded
devteam
parents:
diff changeset
121 tef = open(temperr,'wb')
b0585923decc Uploaded
devteam
parents:
diff changeset
122 process = subprocess.Popen(cl, shell=True, stderr=tef, stdout=tlf, cwd=output_dir)
b0585923decc Uploaded
devteam
parents:
diff changeset
123 rval = process.wait()
b0585923decc Uploaded
devteam
parents:
diff changeset
124 tlf.close()
b0585923decc Uploaded
devteam
parents:
diff changeset
125 tef.close()
b0585923decc Uploaded
devteam
parents:
diff changeset
126 stderrs = self.readLarge(temperr)
b0585923decc Uploaded
devteam
parents:
diff changeset
127 stdouts = self.readLarge(templog)
b0585923decc Uploaded
devteam
parents:
diff changeset
128 if rval > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
129 s = '## executing %s returned status %d and stderr: \n%s\n' % (cl,rval,stderrs)
b0585923decc Uploaded
devteam
parents:
diff changeset
130 stdouts = '%s\n%s' % (stdouts,stderrs)
b0585923decc Uploaded
devteam
parents:
diff changeset
131 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
132 s = '## executing %s returned status %d and nothing on stderr\n' % (cl,rval)
b0585923decc Uploaded
devteam
parents:
diff changeset
133 logging.info(s)
b0585923decc Uploaded
devteam
parents:
diff changeset
134 os.unlink(templog) # always
b0585923decc Uploaded
devteam
parents:
diff changeset
135 os.unlink(temperr) # always
b0585923decc Uploaded
devteam
parents:
diff changeset
136 return s, stdouts, rval # sometimes s is an output
b0585923decc Uploaded
devteam
parents:
diff changeset
137
b0585923decc Uploaded
devteam
parents:
diff changeset
138 def runPic(self, jar, cl):
b0585923decc Uploaded
devteam
parents:
diff changeset
139 """
b0585923decc Uploaded
devteam
parents:
diff changeset
140 cl should be everything after the jar file name in the command
b0585923decc Uploaded
devteam
parents:
diff changeset
141 """
b0585923decc Uploaded
devteam
parents:
diff changeset
142 runme = ['java -Xmx%s' % self.opts.maxjheap]
b0585923decc Uploaded
devteam
parents:
diff changeset
143 runme.append(" -Djava.io.tmpdir='%s' " % self.opts.tmpdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
144 runme.append('-jar %s' % jar)
b0585923decc Uploaded
devteam
parents:
diff changeset
145 runme += cl
b0585923decc Uploaded
devteam
parents:
diff changeset
146
b0585923decc Uploaded
devteam
parents:
diff changeset
147 print runme,self.opts.outdir
b0585923decc Uploaded
devteam
parents:
diff changeset
148
b0585923decc Uploaded
devteam
parents:
diff changeset
149 s,stdouts,rval = self.runCL(cl=runme, output_dir=self.opts.outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
150 return stdouts,rval
b0585923decc Uploaded
devteam
parents:
diff changeset
151
b0585923decc Uploaded
devteam
parents:
diff changeset
152 def samToBam(self,infile=None,outdir=None):
b0585923decc Uploaded
devteam
parents:
diff changeset
153 """
b0585923decc Uploaded
devteam
parents:
diff changeset
154 use samtools view to convert sam to bam
b0585923decc Uploaded
devteam
parents:
diff changeset
155 """
b0585923decc Uploaded
devteam
parents:
diff changeset
156 fd,tempbam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.bam')
b0585923decc Uploaded
devteam
parents:
diff changeset
157 cl = ['samtools view -h -b -S -o ',tempbam,infile]
b0585923decc Uploaded
devteam
parents:
diff changeset
158 tlog,stdouts,rval = self.runCL(cl,outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
159 return tlog,tempbam,rval
b0585923decc Uploaded
devteam
parents:
diff changeset
160
b0585923decc Uploaded
devteam
parents:
diff changeset
161 def sortSam(self, infile=None,outfile=None,outdir=None):
b0585923decc Uploaded
devteam
parents:
diff changeset
162 """
b0585923decc Uploaded
devteam
parents:
diff changeset
163 """
b0585923decc Uploaded
devteam
parents:
diff changeset
164 print '## sortSam got infile=%s,outfile=%s,outdir=%s' % (infile,outfile,outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
165 cl = ['samtools sort',infile,outfile]
b0585923decc Uploaded
devteam
parents:
diff changeset
166 tlog,stdouts,rval = self.runCL(cl,outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
167 return tlog
b0585923decc Uploaded
devteam
parents:
diff changeset
168
b0585923decc Uploaded
devteam
parents:
diff changeset
169 def cleanup(self):
b0585923decc Uploaded
devteam
parents:
diff changeset
170 for fname in self.delme:
b0585923decc Uploaded
devteam
parents:
diff changeset
171 try:
b0585923decc Uploaded
devteam
parents:
diff changeset
172 os.unlink(fname)
b0585923decc Uploaded
devteam
parents:
diff changeset
173 except:
b0585923decc Uploaded
devteam
parents:
diff changeset
174 pass
b0585923decc Uploaded
devteam
parents:
diff changeset
175
b0585923decc Uploaded
devteam
parents:
diff changeset
176 def prettyPicout(self,transpose,maxrows):
b0585923decc Uploaded
devteam
parents:
diff changeset
177 """organize picard outpouts into a report html page
b0585923decc Uploaded
devteam
parents:
diff changeset
178 """
b0585923decc Uploaded
devteam
parents:
diff changeset
179 res = []
b0585923decc Uploaded
devteam
parents:
diff changeset
180 try:
b0585923decc Uploaded
devteam
parents:
diff changeset
181 r = open(self.metricsOut,'r').readlines()
b0585923decc Uploaded
devteam
parents:
diff changeset
182 except:
b0585923decc Uploaded
devteam
parents:
diff changeset
183 r = []
b0585923decc Uploaded
devteam
parents:
diff changeset
184 if len(r) > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
185 res.append('<b>Picard on line resources</b><ul>\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
186 res.append('<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li>\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
187 res.append('<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/>\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
188 if transpose:
b0585923decc Uploaded
devteam
parents:
diff changeset
189 res.append('<b>Picard output (transposed to make it easier to see)</b><hr/>\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
190 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
191 res.append('<b>Picard output</b><hr/>\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
192 res.append('<table cellpadding="3" >\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
193 dat = []
b0585923decc Uploaded
devteam
parents:
diff changeset
194 heads = []
b0585923decc Uploaded
devteam
parents:
diff changeset
195 lastr = len(r) - 1
b0585923decc Uploaded
devteam
parents:
diff changeset
196 # special case for estimate library complexity hist
b0585923decc Uploaded
devteam
parents:
diff changeset
197 thist = False
b0585923decc Uploaded
devteam
parents:
diff changeset
198 for i,row in enumerate(r):
b0585923decc Uploaded
devteam
parents:
diff changeset
199 if row.strip() > '':
b0585923decc Uploaded
devteam
parents:
diff changeset
200 srow = row.split('\t')
b0585923decc Uploaded
devteam
parents:
diff changeset
201 if row.startswith('#'):
b0585923decc Uploaded
devteam
parents:
diff changeset
202 heads.append(row.strip()) # want strings
b0585923decc Uploaded
devteam
parents:
diff changeset
203 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
204 dat.append(srow) # want lists
b0585923decc Uploaded
devteam
parents:
diff changeset
205 if row.startswith('## HISTOGRAM'):
b0585923decc Uploaded
devteam
parents:
diff changeset
206 thist = True
b0585923decc Uploaded
devteam
parents:
diff changeset
207 if len(heads) > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
208 hres = ['<tr class="d%d"><td colspan="2">%s</td></tr>' % (i % 2,x) for i,x in enumerate(heads)]
b0585923decc Uploaded
devteam
parents:
diff changeset
209 res += hres
b0585923decc Uploaded
devteam
parents:
diff changeset
210 heads = []
b0585923decc Uploaded
devteam
parents:
diff changeset
211 if len(dat) > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
212 if transpose and not thist:
b0585923decc Uploaded
devteam
parents:
diff changeset
213 tdat = map(None,*dat) # transpose an arbitrary list of lists
b0585923decc Uploaded
devteam
parents:
diff changeset
214 tdat = ['<tr class="d%d"><td>%s</td><td>%s&nbsp;</td></tr>\n' % ((i+len(heads)) % 2,x[0],x[1]) for i,x in enumerate(tdat)]
b0585923decc Uploaded
devteam
parents:
diff changeset
215 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
216 tdat = ['\t'.join(x).strip() for x in dat] # back to strings :(
b0585923decc Uploaded
devteam
parents:
diff changeset
217 tdat = ['<tr class="d%d"><td colspan="2">%s</td></tr>\n' % ((i+len(heads)) % 2,x) for i,x in enumerate(tdat)]
b0585923decc Uploaded
devteam
parents:
diff changeset
218 res += tdat
b0585923decc Uploaded
devteam
parents:
diff changeset
219 dat = []
b0585923decc Uploaded
devteam
parents:
diff changeset
220 res.append('</table>\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
221 return res
b0585923decc Uploaded
devteam
parents:
diff changeset
222
b0585923decc Uploaded
devteam
parents:
diff changeset
223 def fixPicardOutputs(self,transpose,maxloglines):
b0585923decc Uploaded
devteam
parents:
diff changeset
224 """
b0585923decc Uploaded
devteam
parents:
diff changeset
225 picard produces long hard to read tab header files
b0585923decc Uploaded
devteam
parents:
diff changeset
226 make them available but present them transposed for readability
b0585923decc Uploaded
devteam
parents:
diff changeset
227 """
b0585923decc Uploaded
devteam
parents:
diff changeset
228 logging.shutdown()
b0585923decc Uploaded
devteam
parents:
diff changeset
229 self.cleanup() # remove temp files stored in delme
b0585923decc Uploaded
devteam
parents:
diff changeset
230 rstyle="""<style type="text/css">
b0585923decc Uploaded
devteam
parents:
diff changeset
231 tr.d0 td {background-color: oldlace; color: black;}
b0585923decc Uploaded
devteam
parents:
diff changeset
232 tr.d1 td {background-color: aliceblue; color: black;}
b0585923decc Uploaded
devteam
parents:
diff changeset
233 </style>"""
b0585923decc Uploaded
devteam
parents:
diff changeset
234 res = [rstyle,]
b0585923decc Uploaded
devteam
parents:
diff changeset
235 res.append(galhtmlprefix % self.progname)
b0585923decc Uploaded
devteam
parents:
diff changeset
236 res.append(galhtmlattr % (self.picname,timenow()))
b0585923decc Uploaded
devteam
parents:
diff changeset
237 flist = [x for x in os.listdir(self.opts.outdir) if not x.startswith('.')]
b0585923decc Uploaded
devteam
parents:
diff changeset
238 pdflist = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']
b0585923decc Uploaded
devteam
parents:
diff changeset
239 if len(pdflist) > 0: # assumes all pdfs come with thumbnail .jpgs
b0585923decc Uploaded
devteam
parents:
diff changeset
240 for p in pdflist:
b0585923decc Uploaded
devteam
parents:
diff changeset
241 pbase = os.path.splitext(p)[0] # removes .pdf
b0585923decc Uploaded
devteam
parents:
diff changeset
242 imghref = '%s.jpg' % pbase
b0585923decc Uploaded
devteam
parents:
diff changeset
243 mimghref = '%s-0.jpg' % pbase # multiple pages pdf -> multiple thumbnails without asking!
b0585923decc Uploaded
devteam
parents:
diff changeset
244 if mimghref in flist:
b0585923decc Uploaded
devteam
parents:
diff changeset
245 imghref=mimghref # only one for thumbnail...it's a multi page pdf
b0585923decc Uploaded
devteam
parents:
diff changeset
246 res.append('<table cellpadding="10"><tr><td>\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
247 res.append('<a href="%s"><img src="%s" title="Click image preview for a print quality PDF version" hspace="10" align="middle"></a>\n' % (p,imghref))
b0585923decc Uploaded
devteam
parents:
diff changeset
248 res.append('</tr></td></table>\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
249 if len(flist) > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
250 res.append('<b>The following output files were created (click the filename to view/download a copy):</b><hr/>')
b0585923decc Uploaded
devteam
parents:
diff changeset
251 res.append('<table>\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
252 for i,f in enumerate(flist):
b0585923decc Uploaded
devteam
parents:
diff changeset
253 fn = os.path.split(f)[-1]
b0585923decc Uploaded
devteam
parents:
diff changeset
254 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fn))
b0585923decc Uploaded
devteam
parents:
diff changeset
255 res.append('</table><p/>\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
256 pres = self.prettyPicout(transpose,maxloglines)
b0585923decc Uploaded
devteam
parents:
diff changeset
257 if len(pres) > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
258 res += pres
b0585923decc Uploaded
devteam
parents:
diff changeset
259 l = open(self.log_filename,'r').readlines()
b0585923decc Uploaded
devteam
parents:
diff changeset
260 llen = len(l)
b0585923decc Uploaded
devteam
parents:
diff changeset
261 if llen > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
262 res.append('<b>Picard Tool Run Log</b><hr/>\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
263 rlog = ['<pre>',]
b0585923decc Uploaded
devteam
parents:
diff changeset
264 if llen > maxloglines:
b0585923decc Uploaded
devteam
parents:
diff changeset
265 n = min(50,int(maxloglines/2))
b0585923decc Uploaded
devteam
parents:
diff changeset
266 rlog += l[:n]
b0585923decc Uploaded
devteam
parents:
diff changeset
267 rlog.append('------------ ## %d rows deleted ## --------------\n' % (llen-maxloglines))
b0585923decc Uploaded
devteam
parents:
diff changeset
268 rlog += l[-n:]
b0585923decc Uploaded
devteam
parents:
diff changeset
269 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
270 rlog += l
b0585923decc Uploaded
devteam
parents:
diff changeset
271 rlog.append('</pre>')
b0585923decc Uploaded
devteam
parents:
diff changeset
272 if llen > maxloglines:
b0585923decc Uploaded
devteam
parents:
diff changeset
273 rlog.append('\n<b>## WARNING - %d log lines truncated - <a href="%s">%s</a> contains entire output</b>' % (llen - maxloglines,self.log_filename,self.log_filename))
b0585923decc Uploaded
devteam
parents:
diff changeset
274 res += rlog
b0585923decc Uploaded
devteam
parents:
diff changeset
275 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
276 res.append("### Odd, Picard left no log file %s - must have really barfed badly?\n" % self.log_filename)
b0585923decc Uploaded
devteam
parents:
diff changeset
277 res.append('<hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n')
b0585923decc Uploaded
devteam
parents:
diff changeset
278 res.append( 'generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool')
b0585923decc Uploaded
devteam
parents:
diff changeset
279 res.append(galhtmlpostfix)
b0585923decc Uploaded
devteam
parents:
diff changeset
280 outf = open(self.opts.htmlout,'w')
b0585923decc Uploaded
devteam
parents:
diff changeset
281 outf.write(''.join(res))
b0585923decc Uploaded
devteam
parents:
diff changeset
282 outf.write('\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
283 outf.close()
b0585923decc Uploaded
devteam
parents:
diff changeset
284
b0585923decc Uploaded
devteam
parents:
diff changeset
285 def makePicInterval(self,inbed=None,outf=None):
b0585923decc Uploaded
devteam
parents:
diff changeset
286 """
b0585923decc Uploaded
devteam
parents:
diff changeset
287 picard wants bait and target files to have the same header length as the incoming bam/sam
b0585923decc Uploaded
devteam
parents:
diff changeset
288 a meaningful (ie accurate) representation will fail because of this - so this hack
b0585923decc Uploaded
devteam
parents:
diff changeset
289 it would be far better to be able to supply the original bed untouched
b0585923decc Uploaded
devteam
parents:
diff changeset
290 Additional checking added Ross Lazarus Dec 2011 to deal with two 'bug' reports on the list
b0585923decc Uploaded
devteam
parents:
diff changeset
291 """
b0585923decc Uploaded
devteam
parents:
diff changeset
292 assert inbed <> None
b0585923decc Uploaded
devteam
parents:
diff changeset
293 bed = open(inbed,'r').readlines()
b0585923decc Uploaded
devteam
parents:
diff changeset
294 sbed = [x.split('\t') for x in bed] # lengths MUST be 5
b0585923decc Uploaded
devteam
parents:
diff changeset
295 lens = [len(x) for x in sbed]
b0585923decc Uploaded
devteam
parents:
diff changeset
296 strands = [x[3] for x in sbed if not x[3] in ['+','-']]
b0585923decc Uploaded
devteam
parents:
diff changeset
297 maxl = max(lens)
b0585923decc Uploaded
devteam
parents:
diff changeset
298 minl = min(lens)
b0585923decc Uploaded
devteam
parents:
diff changeset
299 e = []
b0585923decc Uploaded
devteam
parents:
diff changeset
300 if maxl <> minl:
b0585923decc Uploaded
devteam
parents:
diff changeset
301 e.append("## Input error: Inconsistent field count in %s - please read the documentation on bait/target format requirements, fix and try again" % inbed)
b0585923decc Uploaded
devteam
parents:
diff changeset
302 if maxl <> 5:
b0585923decc Uploaded
devteam
parents:
diff changeset
303 e.append("## Input error: %d fields found in %s, 5 required - please read the warning and documentation on bait/target format requirements, fix and try again" % (maxl,inbed))
b0585923decc Uploaded
devteam
parents:
diff changeset
304 if len(strands) > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
305 e.append("## Input error: Fourth column in %s is not the required strand (+ or -) - please read the warning and documentation on bait/target format requirements, fix and try again" % (inbed))
b0585923decc Uploaded
devteam
parents:
diff changeset
306 if len(e) > 0: # write to stderr and quit
b0585923decc Uploaded
devteam
parents:
diff changeset
307 print >> sys.stderr, '\n'.join(e)
b0585923decc Uploaded
devteam
parents:
diff changeset
308 sys.exit(1)
b0585923decc Uploaded
devteam
parents:
diff changeset
309 thead = os.path.join(self.opts.outdir,'tempSamHead.txt')
b0585923decc Uploaded
devteam
parents:
diff changeset
310 if self.opts.datatype == 'sam':
b0585923decc Uploaded
devteam
parents:
diff changeset
311 cl = ['samtools view -H -S',self.opts.input,'>',thead]
b0585923decc Uploaded
devteam
parents:
diff changeset
312 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
313 cl = ['samtools view -H',self.opts.input,'>',thead]
b0585923decc Uploaded
devteam
parents:
diff changeset
314 self.runCL(cl=cl,output_dir=self.opts.outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
315 head = open(thead,'r').readlines()
b0585923decc Uploaded
devteam
parents:
diff changeset
316 s = '## got %d rows of header\n' % (len(head))
b0585923decc Uploaded
devteam
parents:
diff changeset
317 logging.info(s)
b0585923decc Uploaded
devteam
parents:
diff changeset
318 o = open(outf,'w')
b0585923decc Uploaded
devteam
parents:
diff changeset
319 o.write(''.join(head))
b0585923decc Uploaded
devteam
parents:
diff changeset
320 o.write(''.join(bed))
b0585923decc Uploaded
devteam
parents:
diff changeset
321 o.close()
b0585923decc Uploaded
devteam
parents:
diff changeset
322 return outf
b0585923decc Uploaded
devteam
parents:
diff changeset
323
b0585923decc Uploaded
devteam
parents:
diff changeset
324 def cleanSam(self, insam=None, newsam=None, picardErrors=[],outformat=None):
b0585923decc Uploaded
devteam
parents:
diff changeset
325 """
b0585923decc Uploaded
devteam
parents:
diff changeset
326 interesting problem - if paired, must remove mate pair of errors too or we have a new set of errors after cleaning - missing mate pairs!
b0585923decc Uploaded
devteam
parents:
diff changeset
327 Do the work of removing all the error sequences
b0585923decc Uploaded
devteam
parents:
diff changeset
328 pysam is cool
b0585923decc Uploaded
devteam
parents:
diff changeset
329 infile = pysam.Samfile( "-", "r" )
b0585923decc Uploaded
devteam
parents:
diff changeset
330 outfile = pysam.Samfile( "-", "w", template = infile )
b0585923decc Uploaded
devteam
parents:
diff changeset
331 for s in infile: outfile.write(s)
b0585923decc Uploaded
devteam
parents:
diff changeset
332
b0585923decc Uploaded
devteam
parents:
diff changeset
333 errors from ValidateSameFile.jar look like
b0585923decc Uploaded
devteam
parents:
diff changeset
334 WARNING: Record 32, Read name SRR006041.1202260, NM tag (nucleotide differences) is missing
b0585923decc Uploaded
devteam
parents:
diff changeset
335 ERROR: Record 33, Read name SRR006041.1042721, Empty sequence dictionary.
b0585923decc Uploaded
devteam
parents:
diff changeset
336 ERROR: Record 33, Read name SRR006041.1042721, RG ID on SAMRecord not found in header: SRR006041
b0585923decc Uploaded
devteam
parents:
diff changeset
337
b0585923decc Uploaded
devteam
parents:
diff changeset
338 """
b0585923decc Uploaded
devteam
parents:
diff changeset
339 assert os.path.isfile(insam), 'rgPicardValidate cleansam needs an input sam file - cannot find %s' % insam
b0585923decc Uploaded
devteam
parents:
diff changeset
340 assert newsam <> None, 'rgPicardValidate cleansam needs an output new sam file path'
b0585923decc Uploaded
devteam
parents:
diff changeset
341 removeNames = [x.split(',')[1].replace(' Read name ','') for x in picardErrors if len(x.split(',')) > 2]
b0585923decc Uploaded
devteam
parents:
diff changeset
342 remDict = dict(zip(removeNames,range(len(removeNames))))
b0585923decc Uploaded
devteam
parents:
diff changeset
343 infile = pysam.Samfile(insam,'rb')
b0585923decc Uploaded
devteam
parents:
diff changeset
344 info = 'found %d error sequences in picardErrors, %d unique' % (len(removeNames),len(remDict))
b0585923decc Uploaded
devteam
parents:
diff changeset
345 if len(removeNames) > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
346 outfile = pysam.Samfile(newsam,'wb',template=infile) # template must be an open file
b0585923decc Uploaded
devteam
parents:
diff changeset
347 i = 0
b0585923decc Uploaded
devteam
parents:
diff changeset
348 j = 0
b0585923decc Uploaded
devteam
parents:
diff changeset
349 for row in infile:
b0585923decc Uploaded
devteam
parents:
diff changeset
350 dropme = remDict.get(row.qname,None) # keep if None
b0585923decc Uploaded
devteam
parents:
diff changeset
351 if not dropme:
b0585923decc Uploaded
devteam
parents:
diff changeset
352 outfile.write(row)
b0585923decc Uploaded
devteam
parents:
diff changeset
353 j += 1
b0585923decc Uploaded
devteam
parents:
diff changeset
354 else: # discard
b0585923decc Uploaded
devteam
parents:
diff changeset
355 i += 1
b0585923decc Uploaded
devteam
parents:
diff changeset
356 info = '%s\n%s' % (info, 'Discarded %d lines writing %d to %s from %s' % (i,j,newsam,insam))
b0585923decc Uploaded
devteam
parents:
diff changeset
357 outfile.close()
b0585923decc Uploaded
devteam
parents:
diff changeset
358 infile.close()
b0585923decc Uploaded
devteam
parents:
diff changeset
359 else: # we really want a nullop or a simple pointer copy
b0585923decc Uploaded
devteam
parents:
diff changeset
360 infile.close()
b0585923decc Uploaded
devteam
parents:
diff changeset
361 if newsam:
b0585923decc Uploaded
devteam
parents:
diff changeset
362 shutil.copy(insam,newsam)
b0585923decc Uploaded
devteam
parents:
diff changeset
363 logging.info(info)
b0585923decc Uploaded
devteam
parents:
diff changeset
364
b0585923decc Uploaded
devteam
parents:
diff changeset
365
b0585923decc Uploaded
devteam
parents:
diff changeset
366
b0585923decc Uploaded
devteam
parents:
diff changeset
367 def __main__():
b0585923decc Uploaded
devteam
parents:
diff changeset
368 doFix = False # tools returning htmlfile don't need this
b0585923decc Uploaded
devteam
parents:
diff changeset
369 doTranspose = True # default
b0585923decc Uploaded
devteam
parents:
diff changeset
370 maxloglines = 100 # default
b0585923decc Uploaded
devteam
parents:
diff changeset
371 #Parse Command Line
b0585923decc Uploaded
devteam
parents:
diff changeset
372 op = optparse.OptionParser()
b0585923decc Uploaded
devteam
parents:
diff changeset
373 # All tools
b0585923decc Uploaded
devteam
parents:
diff changeset
374 op.add_option('-i', '--input', dest='input', help='Input SAM or BAM file' )
b0585923decc Uploaded
devteam
parents:
diff changeset
375 op.add_option('-e', '--inputext', default=None)
b0585923decc Uploaded
devteam
parents:
diff changeset
376 op.add_option('-o', '--output', default=None)
b0585923decc Uploaded
devteam
parents:
diff changeset
377 op.add_option('-n', '--title', default="Pick a Picard Tool")
b0585923decc Uploaded
devteam
parents:
diff changeset
378 op.add_option('-t', '--htmlout', default=None)
b0585923decc Uploaded
devteam
parents:
diff changeset
379 op.add_option('-d', '--outdir', default=None)
b0585923decc Uploaded
devteam
parents:
diff changeset
380 op.add_option('-x', '--maxjheap', default='3000m')
b0585923decc Uploaded
devteam
parents:
diff changeset
381 op.add_option('-b', '--bisulphite', default='false')
b0585923decc Uploaded
devteam
parents:
diff changeset
382 op.add_option('-s', '--sortorder', default='query')
b0585923decc Uploaded
devteam
parents:
diff changeset
383 op.add_option('','--tmpdir', default='/tmp')
b0585923decc Uploaded
devteam
parents:
diff changeset
384 op.add_option('-j','--jar',default='')
b0585923decc Uploaded
devteam
parents:
diff changeset
385 op.add_option('','--picard-cmd',default=None)
b0585923decc Uploaded
devteam
parents:
diff changeset
386 # Many tools
b0585923decc Uploaded
devteam
parents:
diff changeset
387 op.add_option( '', '--output-format', dest='output_format', help='Output format' )
b0585923decc Uploaded
devteam
parents:
diff changeset
388 op.add_option( '', '--bai-file', dest='bai_file', help='The path to the index file for the input bam file' )
b0585923decc Uploaded
devteam
parents:
diff changeset
389 op.add_option( '', '--ref', dest='ref', help='Built-in reference with fasta and dict file', default=None )
b0585923decc Uploaded
devteam
parents:
diff changeset
390 # CreateSequenceDictionary
b0585923decc Uploaded
devteam
parents:
diff changeset
391 op.add_option( '', '--ref-file', dest='ref_file', help='Fasta to use as reference', default=None )
b0585923decc Uploaded
devteam
parents:
diff changeset
392 op.add_option( '', '--species-name', dest='species_name', help='Species name to use in creating dict file from fasta file' )
b0585923decc Uploaded
devteam
parents:
diff changeset
393 op.add_option( '', '--build-name', dest='build_name', help='Name of genome assembly to use in creating dict file from fasta file' )
b0585923decc Uploaded
devteam
parents:
diff changeset
394 op.add_option( '', '--trunc-names', dest='trunc_names', help='Truncate sequence names at first whitespace from fasta file' )
b0585923decc Uploaded
devteam
parents:
diff changeset
395 # MarkDuplicates
b0585923decc Uploaded
devteam
parents:
diff changeset
396 op.add_option( '', '--remdups', default='true', help='Remove duplicates from output file' )
b0585923decc Uploaded
devteam
parents:
diff changeset
397 op.add_option( '', '--optdupdist', default="100", help='Maximum pixels between two identical sequences in order to consider them optical duplicates.' )
b0585923decc Uploaded
devteam
parents:
diff changeset
398 # CollectInsertSizeMetrics
b0585923decc Uploaded
devteam
parents:
diff changeset
399 op.add_option('', '--taillimit', default="0")
b0585923decc Uploaded
devteam
parents:
diff changeset
400 op.add_option('', '--histwidth', default="0")
b0585923decc Uploaded
devteam
parents:
diff changeset
401 op.add_option('', '--minpct', default="0.01")
b0585923decc Uploaded
devteam
parents:
diff changeset
402 op.add_option('', '--malevel', default='')
b0585923decc Uploaded
devteam
parents:
diff changeset
403 op.add_option('', '--deviations', default="0.0")
b0585923decc Uploaded
devteam
parents:
diff changeset
404 # CollectAlignmentSummaryMetrics
b0585923decc Uploaded
devteam
parents:
diff changeset
405 op.add_option('', '--maxinsert', default="20")
b0585923decc Uploaded
devteam
parents:
diff changeset
406 op.add_option('', '--adaptors', default='')
b0585923decc Uploaded
devteam
parents:
diff changeset
407 # FixMateInformation and validate
b0585923decc Uploaded
devteam
parents:
diff changeset
408 # CollectGcBiasMetrics
b0585923decc Uploaded
devteam
parents:
diff changeset
409 op.add_option('', '--windowsize', default='100')
b0585923decc Uploaded
devteam
parents:
diff changeset
410 op.add_option('', '--mingenomefrac', default='0.00001')
b0585923decc Uploaded
devteam
parents:
diff changeset
411 # AddOrReplaceReadGroups
b0585923decc Uploaded
devteam
parents:
diff changeset
412 op.add_option( '', '--rg-opts', dest='rg_opts', help='Specify extra (optional) arguments with full, otherwise preSet' )
b0585923decc Uploaded
devteam
parents:
diff changeset
413 op.add_option( '', '--rg-lb', dest='rg_library', help='Read Group Library' )
b0585923decc Uploaded
devteam
parents:
diff changeset
414 op.add_option( '', '--rg-pl', dest='rg_platform', help='Read Group platform (e.g. illumina, solid)' )
b0585923decc Uploaded
devteam
parents:
diff changeset
415 op.add_option( '', '--rg-pu', dest='rg_plat_unit', help='Read Group platform unit (eg. run barcode) ' )
b0585923decc Uploaded
devteam
parents:
diff changeset
416 op.add_option( '', '--rg-sm', dest='rg_sample', help='Read Group sample name' )
b0585923decc Uploaded
devteam
parents:
diff changeset
417 op.add_option( '', '--rg-id', dest='rg_id', help='Read Group ID' )
b0585923decc Uploaded
devteam
parents:
diff changeset
418 op.add_option( '', '--rg-cn', dest='rg_seq_center', help='Read Group sequencing center name' )
b0585923decc Uploaded
devteam
parents:
diff changeset
419 op.add_option( '', '--rg-ds', dest='rg_desc', help='Read Group description' )
b0585923decc Uploaded
devteam
parents:
diff changeset
420 # ReorderSam
b0585923decc Uploaded
devteam
parents:
diff changeset
421 op.add_option( '', '--allow-inc-dict-concord', dest='allow_inc_dict_concord', help='Allow incomplete dict concordance' )
b0585923decc Uploaded
devteam
parents:
diff changeset
422 op.add_option( '', '--allow-contig-len-discord', dest='allow_contig_len_discord', help='Allow contig length discordance' )
b0585923decc Uploaded
devteam
parents:
diff changeset
423 # ReplaceSamHeader
b0585923decc Uploaded
devteam
parents:
diff changeset
424 op.add_option( '', '--header-file', dest='header_file', help='sam or bam file from which header will be read' )
b0585923decc Uploaded
devteam
parents:
diff changeset
425
b0585923decc Uploaded
devteam
parents:
diff changeset
426 op.add_option('','--assumesorted', default='true')
b0585923decc Uploaded
devteam
parents:
diff changeset
427 op.add_option('','--readregex', default="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*")
b0585923decc Uploaded
devteam
parents:
diff changeset
428 #estimatelibrarycomplexity
b0585923decc Uploaded
devteam
parents:
diff changeset
429 op.add_option('','--minid', default="5")
b0585923decc Uploaded
devteam
parents:
diff changeset
430 op.add_option('','--maxdiff', default="0.03")
b0585923decc Uploaded
devteam
parents:
diff changeset
431 op.add_option('','--minmeanq', default="20")
b0585923decc Uploaded
devteam
parents:
diff changeset
432 #hsmetrics
b0585923decc Uploaded
devteam
parents:
diff changeset
433 op.add_option('','--baitbed', default=None)
b0585923decc Uploaded
devteam
parents:
diff changeset
434 op.add_option('','--targetbed', default=None)
b0585923decc Uploaded
devteam
parents:
diff changeset
435 #validate
b0585923decc Uploaded
devteam
parents:
diff changeset
436 op.add_option('','--ignoreflags', action='append', type="string")
b0585923decc Uploaded
devteam
parents:
diff changeset
437 op.add_option('','--maxerrors', default=None)
b0585923decc Uploaded
devteam
parents:
diff changeset
438 op.add_option('','--datatype', default=None)
b0585923decc Uploaded
devteam
parents:
diff changeset
439 op.add_option('','--bamout', default=None)
b0585923decc Uploaded
devteam
parents:
diff changeset
440 op.add_option('','--samout', default=None)
b0585923decc Uploaded
devteam
parents:
diff changeset
441
b0585923decc Uploaded
devteam
parents:
diff changeset
442 opts, args = op.parse_args()
b0585923decc Uploaded
devteam
parents:
diff changeset
443 opts.sortme = opts.assumesorted == 'false'
b0585923decc Uploaded
devteam
parents:
diff changeset
444 assert opts.input <> None
b0585923decc Uploaded
devteam
parents:
diff changeset
445 # need to add
b0585923decc Uploaded
devteam
parents:
diff changeset
446 # instance that does all the work
b0585923decc Uploaded
devteam
parents:
diff changeset
447 pic = PicardBase(opts,sys.argv[0])
b0585923decc Uploaded
devteam
parents:
diff changeset
448
b0585923decc Uploaded
devteam
parents:
diff changeset
449 tmp_dir = opts.outdir
b0585923decc Uploaded
devteam
parents:
diff changeset
450 haveTempout = False # we use this where sam output is an option
b0585923decc Uploaded
devteam
parents:
diff changeset
451 rval = 0
b0585923decc Uploaded
devteam
parents:
diff changeset
452 stdouts = 'Not run yet'
b0585923decc Uploaded
devteam
parents:
diff changeset
453 # set ref and dict files to use (create if necessary)
b0585923decc Uploaded
devteam
parents:
diff changeset
454 ref_file_name = opts.ref
b0585923decc Uploaded
devteam
parents:
diff changeset
455 if opts.ref_file <> None:
b0585923decc Uploaded
devteam
parents:
diff changeset
456 csd = 'CreateSequenceDictionary'
b0585923decc Uploaded
devteam
parents:
diff changeset
457 realjarpath = os.path.split(opts.jar)[0]
b0585923decc Uploaded
devteam
parents:
diff changeset
458 jarpath = os.path.join(realjarpath,'%s.jar' % csd) # for refseq
b0585923decc Uploaded
devteam
parents:
diff changeset
459 tmp_ref_fd, tmp_ref_name = tempfile.mkstemp( dir=opts.tmpdir , prefix = pic.picname)
b0585923decc Uploaded
devteam
parents:
diff changeset
460 ref_file_name = '%s.fasta' % tmp_ref_name
b0585923decc Uploaded
devteam
parents:
diff changeset
461 # build dict
b0585923decc Uploaded
devteam
parents:
diff changeset
462 dict_file_name = '%s.dict' % tmp_ref_name
b0585923decc Uploaded
devteam
parents:
diff changeset
463 os.symlink( opts.ref_file, ref_file_name )
b0585923decc Uploaded
devteam
parents:
diff changeset
464 cl = ['REFERENCE=%s' % ref_file_name]
b0585923decc Uploaded
devteam
parents:
diff changeset
465 cl.append('OUTPUT=%s' % dict_file_name)
b0585923decc Uploaded
devteam
parents:
diff changeset
466 cl.append('URI=%s' % os.path.basename( opts.ref_file ))
b0585923decc Uploaded
devteam
parents:
diff changeset
467 cl.append('TRUNCATE_NAMES_AT_WHITESPACE=%s' % opts.trunc_names)
b0585923decc Uploaded
devteam
parents:
diff changeset
468 if opts.species_name:
b0585923decc Uploaded
devteam
parents:
diff changeset
469 cl.append('SPECIES=%s' % opts.species_name)
b0585923decc Uploaded
devteam
parents:
diff changeset
470 if opts.build_name:
b0585923decc Uploaded
devteam
parents:
diff changeset
471 cl.append('GENOME_ASSEMBLY=%s' % opts.build_name)
b0585923decc Uploaded
devteam
parents:
diff changeset
472 pic.delme.append(dict_file_name)
b0585923decc Uploaded
devteam
parents:
diff changeset
473 pic.delme.append(ref_file_name)
b0585923decc Uploaded
devteam
parents:
diff changeset
474 pic.delme.append(tmp_ref_name)
b0585923decc Uploaded
devteam
parents:
diff changeset
475 stdouts,rval = pic.runPic(jarpath, cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
476 # run relevant command(s)
b0585923decc Uploaded
devteam
parents:
diff changeset
477
b0585923decc Uploaded
devteam
parents:
diff changeset
478 # define temporary output
b0585923decc Uploaded
devteam
parents:
diff changeset
479 # if output is sam, it must have that extension, otherwise bam will be produced
b0585923decc Uploaded
devteam
parents:
diff changeset
480 # specify sam or bam file with extension
b0585923decc Uploaded
devteam
parents:
diff changeset
481 if opts.output_format == 'sam':
b0585923decc Uploaded
devteam
parents:
diff changeset
482 suff = '.sam'
b0585923decc Uploaded
devteam
parents:
diff changeset
483 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
484 suff = ''
b0585923decc Uploaded
devteam
parents:
diff changeset
485 tmp_fd, tempout = tempfile.mkstemp( dir=opts.tmpdir, suffix=suff )
b0585923decc Uploaded
devteam
parents:
diff changeset
486
b0585923decc Uploaded
devteam
parents:
diff changeset
487 cl = ['VALIDATION_STRINGENCY=LENIENT',]
b0585923decc Uploaded
devteam
parents:
diff changeset
488
b0585923decc Uploaded
devteam
parents:
diff changeset
489 if pic.picname == 'AddOrReplaceReadGroups':
b0585923decc Uploaded
devteam
parents:
diff changeset
490 # sort order to match Galaxy's default
b0585923decc Uploaded
devteam
parents:
diff changeset
491 cl.append('SORT_ORDER=coordinate')
b0585923decc Uploaded
devteam
parents:
diff changeset
492 # input
b0585923decc Uploaded
devteam
parents:
diff changeset
493 cl.append('INPUT=%s' % opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
494 # outputs
b0585923decc Uploaded
devteam
parents:
diff changeset
495 cl.append('OUTPUT=%s' % tempout)
b0585923decc Uploaded
devteam
parents:
diff changeset
496 # required read groups
b0585923decc Uploaded
devteam
parents:
diff changeset
497 cl.append('RGLB="%s"' % opts.rg_library)
b0585923decc Uploaded
devteam
parents:
diff changeset
498 cl.append('RGPL="%s"' % opts.rg_platform)
b0585923decc Uploaded
devteam
parents:
diff changeset
499 cl.append('RGPU="%s"' % opts.rg_plat_unit)
b0585923decc Uploaded
devteam
parents:
diff changeset
500 cl.append('RGSM="%s"' % opts.rg_sample)
b0585923decc Uploaded
devteam
parents:
diff changeset
501 if opts.rg_id:
b0585923decc Uploaded
devteam
parents:
diff changeset
502 cl.append('RGID="%s"' % opts.rg_id)
b0585923decc Uploaded
devteam
parents:
diff changeset
503 # optional read groups
b0585923decc Uploaded
devteam
parents:
diff changeset
504 if opts.rg_seq_center:
b0585923decc Uploaded
devteam
parents:
diff changeset
505 cl.append('RGCN="%s"' % opts.rg_seq_center)
b0585923decc Uploaded
devteam
parents:
diff changeset
506 if opts.rg_desc:
b0585923decc Uploaded
devteam
parents:
diff changeset
507 cl.append('RGDS="%s"' % opts.rg_desc)
b0585923decc Uploaded
devteam
parents:
diff changeset
508 stdouts,rval = pic.runPic(opts.jar, cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
509 haveTempout = True
b0585923decc Uploaded
devteam
parents:
diff changeset
510
b0585923decc Uploaded
devteam
parents:
diff changeset
511 elif pic.picname == 'BamIndexStats':
b0585923decc Uploaded
devteam
parents:
diff changeset
512 tmp_fd, tmp_name = tempfile.mkstemp( dir=tmp_dir )
b0585923decc Uploaded
devteam
parents:
diff changeset
513 tmp_bam_name = '%s.bam' % tmp_name
b0585923decc Uploaded
devteam
parents:
diff changeset
514 tmp_bai_name = '%s.bai' % tmp_bam_name
b0585923decc Uploaded
devteam
parents:
diff changeset
515 os.symlink( opts.input, tmp_bam_name )
b0585923decc Uploaded
devteam
parents:
diff changeset
516 os.symlink( opts.bai_file, tmp_bai_name )
b0585923decc Uploaded
devteam
parents:
diff changeset
517 cl.append('INPUT=%s' % ( tmp_bam_name ))
b0585923decc Uploaded
devteam
parents:
diff changeset
518 pic.delme.append(tmp_bam_name)
b0585923decc Uploaded
devteam
parents:
diff changeset
519 pic.delme.append(tmp_bai_name)
b0585923decc Uploaded
devteam
parents:
diff changeset
520 pic.delme.append(tmp_name)
b0585923decc Uploaded
devteam
parents:
diff changeset
521 stdouts,rval = pic.runPic( opts.jar, cl )
b0585923decc Uploaded
devteam
parents:
diff changeset
522 f = open(pic.metricsOut,'a')
b0585923decc Uploaded
devteam
parents:
diff changeset
523 f.write(stdouts) # got this on stdout from runCl
b0585923decc Uploaded
devteam
parents:
diff changeset
524 f.write('\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
525 f.close()
b0585923decc Uploaded
devteam
parents:
diff changeset
526 doTranspose = False # but not transposed
b0585923decc Uploaded
devteam
parents:
diff changeset
527
b0585923decc Uploaded
devteam
parents:
diff changeset
528 elif pic.picname == 'EstimateLibraryComplexity':
b0585923decc Uploaded
devteam
parents:
diff changeset
529 cl.append('I=%s' % opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
530 cl.append('O=%s' % pic.metricsOut)
b0585923decc Uploaded
devteam
parents:
diff changeset
531 if float(opts.minid) > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
532 cl.append('MIN_IDENTICAL_BASES=%s' % opts.minid)
b0585923decc Uploaded
devteam
parents:
diff changeset
533 if float(opts.maxdiff) > 0.0:
b0585923decc Uploaded
devteam
parents:
diff changeset
534 cl.append('MAX_DIFF_RATE=%s' % opts.maxdiff)
b0585923decc Uploaded
devteam
parents:
diff changeset
535 if float(opts.minmeanq) > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
536 cl.append('MIN_MEAN_QUALITY=%s' % opts.minmeanq)
b0585923decc Uploaded
devteam
parents:
diff changeset
537 if opts.readregex > '':
b0585923decc Uploaded
devteam
parents:
diff changeset
538 cl.append('READ_NAME_REGEX="%s"' % opts.readregex)
b0585923decc Uploaded
devteam
parents:
diff changeset
539 if float(opts.optdupdist) > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
540 cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist)
b0585923decc Uploaded
devteam
parents:
diff changeset
541 stdouts,rval = pic.runPic(opts.jar, cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
542
b0585923decc Uploaded
devteam
parents:
diff changeset
543 elif pic.picname == 'CollectAlignmentSummaryMetrics':
b0585923decc Uploaded
devteam
parents:
diff changeset
544 # Why do we do this fakefasta thing?
b0585923decc Uploaded
devteam
parents:
diff changeset
545 # Because we need NO fai to be available or picard barfs unless it matches the input data.
b0585923decc Uploaded
devteam
parents:
diff changeset
546 # why? Dunno Seems to work without complaining if the .bai file is AWOL....
b0585923decc Uploaded
devteam
parents:
diff changeset
547 fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name))
b0585923decc Uploaded
devteam
parents:
diff changeset
548 try:
b0585923decc Uploaded
devteam
parents:
diff changeset
549 os.symlink(ref_file_name,fakefasta)
b0585923decc Uploaded
devteam
parents:
diff changeset
550 except:
b0585923decc Uploaded
devteam
parents:
diff changeset
551 s = '## unable to symlink %s to %s - different devices? Will shutil.copy'
b0585923decc Uploaded
devteam
parents:
diff changeset
552 info = s
b0585923decc Uploaded
devteam
parents:
diff changeset
553 shutil.copy(ref_file_name,fakefasta)
b0585923decc Uploaded
devteam
parents:
diff changeset
554 pic.delme.append(fakefasta)
b0585923decc Uploaded
devteam
parents:
diff changeset
555 cl.append('ASSUME_SORTED=true')
b0585923decc Uploaded
devteam
parents:
diff changeset
556 adaptlist = opts.adaptors.split(',')
b0585923decc Uploaded
devteam
parents:
diff changeset
557 adaptorseqs = ['ADAPTER_SEQUENCE=%s' % x for x in adaptlist]
b0585923decc Uploaded
devteam
parents:
diff changeset
558 cl += adaptorseqs
b0585923decc Uploaded
devteam
parents:
diff changeset
559 cl.append('IS_BISULFITE_SEQUENCED=%s' % opts.bisulphite)
b0585923decc Uploaded
devteam
parents:
diff changeset
560 cl.append('MAX_INSERT_SIZE=%s' % opts.maxinsert)
b0585923decc Uploaded
devteam
parents:
diff changeset
561 cl.append('OUTPUT=%s' % pic.metricsOut)
b0585923decc Uploaded
devteam
parents:
diff changeset
562 cl.append('R=%s' % fakefasta)
b0585923decc Uploaded
devteam
parents:
diff changeset
563 cl.append('TMP_DIR=%s' % opts.tmpdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
564 if not opts.assumesorted.lower() == 'true': # we need to sort input
b0585923decc Uploaded
devteam
parents:
diff changeset
565 sortedfile = '%s.sorted' % os.path.basename(opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
566 if opts.datatype == 'sam': # need to work with a bam
b0585923decc Uploaded
devteam
parents:
diff changeset
567 tlog,tempbam,trval = pic.samToBam(opts.input,opts.outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
568 pic.delme.append(tempbam)
b0585923decc Uploaded
devteam
parents:
diff changeset
569 try:
b0585923decc Uploaded
devteam
parents:
diff changeset
570 tlog = pic.sortSam(tempbam,sortedfile,opts.outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
571 except:
b0585923decc Uploaded
devteam
parents:
diff changeset
572 print '## exception on sorting sam file %s' % opts.input
b0585923decc Uploaded
devteam
parents:
diff changeset
573 else: # is already bam
b0585923decc Uploaded
devteam
parents:
diff changeset
574 try:
b0585923decc Uploaded
devteam
parents:
diff changeset
575 tlog = pic.sortSam(opts.input,sortedfile,opts.outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
576 except : # bug - [bam_sort_core] not being ignored - TODO fixme
b0585923decc Uploaded
devteam
parents:
diff changeset
577 print '## exception %s on sorting bam file %s' % (sys.exc_info()[0],opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
578 cl.append('INPUT=%s.bam' % os.path.abspath(os.path.join(opts.outdir,sortedfile)))
b0585923decc Uploaded
devteam
parents:
diff changeset
579 pic.delme.append(os.path.join(opts.outdir,sortedfile))
b0585923decc Uploaded
devteam
parents:
diff changeset
580 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
581 cl.append('INPUT=%s' % os.path.abspath(opts.input))
b0585923decc Uploaded
devteam
parents:
diff changeset
582 stdouts,rval = pic.runPic(opts.jar, cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
583
b0585923decc Uploaded
devteam
parents:
diff changeset
584
b0585923decc Uploaded
devteam
parents:
diff changeset
585 elif pic.picname == 'CollectGcBiasMetrics':
b0585923decc Uploaded
devteam
parents:
diff changeset
586 assert os.path.isfile(ref_file_name),'PicardGC needs a reference sequence - cannot read %s' % ref_file_name
b0585923decc Uploaded
devteam
parents:
diff changeset
587 # sigh. Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data.
b0585923decc Uploaded
devteam
parents:
diff changeset
588 # why? Dunno
b0585923decc Uploaded
devteam
parents:
diff changeset
589 fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name))
b0585923decc Uploaded
devteam
parents:
diff changeset
590 try:
b0585923decc Uploaded
devteam
parents:
diff changeset
591 os.symlink(ref_file_name,fakefasta)
b0585923decc Uploaded
devteam
parents:
diff changeset
592 except:
b0585923decc Uploaded
devteam
parents:
diff changeset
593 s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy'
b0585923decc Uploaded
devteam
parents:
diff changeset
594 info = s
b0585923decc Uploaded
devteam
parents:
diff changeset
595 shutil.copy(ref_file_name,fakefasta)
b0585923decc Uploaded
devteam
parents:
diff changeset
596 pic.delme.append(fakefasta)
b0585923decc Uploaded
devteam
parents:
diff changeset
597 x = 'rgPicardGCBiasMetrics'
b0585923decc Uploaded
devteam
parents:
diff changeset
598 pdfname = '%s.pdf' % x
b0585923decc Uploaded
devteam
parents:
diff changeset
599 jpgname = '%s.jpg' % x
b0585923decc Uploaded
devteam
parents:
diff changeset
600 tempout = os.path.join(opts.outdir,'rgPicardGCBiasMetrics.out')
b0585923decc Uploaded
devteam
parents:
diff changeset
601 temppdf = os.path.join(opts.outdir,pdfname)
b0585923decc Uploaded
devteam
parents:
diff changeset
602 cl.append('R=%s' % fakefasta)
b0585923decc Uploaded
devteam
parents:
diff changeset
603 cl.append('WINDOW_SIZE=%s' % opts.windowsize)
b0585923decc Uploaded
devteam
parents:
diff changeset
604 cl.append('MINIMUM_GENOME_FRACTION=%s' % opts.mingenomefrac)
b0585923decc Uploaded
devteam
parents:
diff changeset
605 cl.append('INPUT=%s' % opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
606 cl.append('OUTPUT=%s' % tempout)
b0585923decc Uploaded
devteam
parents:
diff changeset
607 cl.append('TMP_DIR=%s' % opts.tmpdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
608 cl.append('CHART_OUTPUT=%s' % temppdf)
b0585923decc Uploaded
devteam
parents:
diff changeset
609 cl.append('SUMMARY_OUTPUT=%s' % pic.metricsOut)
b0585923decc Uploaded
devteam
parents:
diff changeset
610 stdouts,rval = pic.runPic(opts.jar, cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
611 if os.path.isfile(temppdf):
b0585923decc Uploaded
devteam
parents:
diff changeset
612 cl2 = ['convert','-resize x400',temppdf,os.path.join(opts.outdir,jpgname)] # make the jpg for fixPicardOutputs to find
b0585923decc Uploaded
devteam
parents:
diff changeset
613 s,stdouts,rval = pic.runCL(cl=cl2,output_dir=opts.outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
614 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
615 s='### runGC: Unable to find pdf %s - please check the log for the causal problem\n' % temppdf
b0585923decc Uploaded
devteam
parents:
diff changeset
616 lf = open(pic.log_filename,'a')
b0585923decc Uploaded
devteam
parents:
diff changeset
617 lf.write(s)
b0585923decc Uploaded
devteam
parents:
diff changeset
618 lf.write('\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
619 lf.close()
b0585923decc Uploaded
devteam
parents:
diff changeset
620
b0585923decc Uploaded
devteam
parents:
diff changeset
621 elif pic.picname == 'CollectInsertSizeMetrics':
b0585923decc Uploaded
devteam
parents:
diff changeset
622 """ <command interpreter="python">
b0585923decc Uploaded
devteam
parents:
diff changeset
623 picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" --deviations "$deviations"
b0585923decc Uploaded
devteam
parents:
diff changeset
624 --histwidth "$histWidth" --minpct "$minPct" --malevel "$malevel"
b0585923decc Uploaded
devteam
parents:
diff changeset
625 -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/picard/CollectInsertSizeMetrics.jar" -d "$html_file.files_path" -t "$html_file"
b0585923decc Uploaded
devteam
parents:
diff changeset
626 </command>
b0585923decc Uploaded
devteam
parents:
diff changeset
627 """
b0585923decc Uploaded
devteam
parents:
diff changeset
628 isPDF = 'InsertSizeHist.pdf'
b0585923decc Uploaded
devteam
parents:
diff changeset
629 pdfpath = os.path.join(opts.outdir,isPDF)
b0585923decc Uploaded
devteam
parents:
diff changeset
630 histpdf = 'InsertSizeHist.pdf'
b0585923decc Uploaded
devteam
parents:
diff changeset
631 cl.append('I=%s' % opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
632 cl.append('O=%s' % pic.metricsOut)
b0585923decc Uploaded
devteam
parents:
diff changeset
633 cl.append('HISTOGRAM_FILE=%s' % histpdf)
b0585923decc Uploaded
devteam
parents:
diff changeset
634 #if opts.taillimit <> '0': # this was deprecated although still mentioned in the docs at 1.56
b0585923decc Uploaded
devteam
parents:
diff changeset
635 # cl.append('TAIL_LIMIT=%s' % opts.taillimit)
b0585923decc Uploaded
devteam
parents:
diff changeset
636 if opts.histwidth <> '0':
b0585923decc Uploaded
devteam
parents:
diff changeset
637 cl.append('HISTOGRAM_WIDTH=%s' % opts.histwidth)
b0585923decc Uploaded
devteam
parents:
diff changeset
638 if float( opts.minpct) > 0.0:
b0585923decc Uploaded
devteam
parents:
diff changeset
639 cl.append('MINIMUM_PCT=%s' % opts.minpct)
b0585923decc Uploaded
devteam
parents:
diff changeset
640 if float(opts.deviations) > 0.0:
b0585923decc Uploaded
devteam
parents:
diff changeset
641 cl.append('DEVIATIONS=%s' % opts.deviations)
b0585923decc Uploaded
devteam
parents:
diff changeset
642 if opts.malevel:
b0585923decc Uploaded
devteam
parents:
diff changeset
643 malists = opts.malevel.split(',')
b0585923decc Uploaded
devteam
parents:
diff changeset
644 malist = ['METRIC_ACCUMULATION_LEVEL=%s' % x for x in malists]
b0585923decc Uploaded
devteam
parents:
diff changeset
645 cl += malist
b0585923decc Uploaded
devteam
parents:
diff changeset
646 stdouts,rval = pic.runPic(opts.jar, cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
647 if os.path.exists(pdfpath): # automake thumbnail - will be added to html
b0585923decc Uploaded
devteam
parents:
diff changeset
648 cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath]
b0585923decc Uploaded
devteam
parents:
diff changeset
649 pic.runCL(cl=cl2,output_dir=opts.outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
650 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
651 s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath
b0585923decc Uploaded
devteam
parents:
diff changeset
652 s += 'This <b>always happens if single ended data was provided</b> to this tool,\n'
b0585923decc Uploaded
devteam
parents:
diff changeset
653 s += 'so please double check that your input data really is paired-end NGS data.<br/>\n'
b0585923decc Uploaded
devteam
parents:
diff changeset
654 s += 'If your input was paired data this may be a bug worth reporting to the galaxy-bugs list\n<br/>'
b0585923decc Uploaded
devteam
parents:
diff changeset
655 logging.info(s)
b0585923decc Uploaded
devteam
parents:
diff changeset
656 if len(stdouts) > 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
657 logging.info(stdouts)
b0585923decc Uploaded
devteam
parents:
diff changeset
658
b0585923decc Uploaded
devteam
parents:
diff changeset
659 elif pic.picname == 'MarkDuplicates':
b0585923decc Uploaded
devteam
parents:
diff changeset
660 # assume sorted even if header says otherwise
b0585923decc Uploaded
devteam
parents:
diff changeset
661 cl.append('ASSUME_SORTED=%s' % (opts.assumesorted))
b0585923decc Uploaded
devteam
parents:
diff changeset
662 # input
b0585923decc Uploaded
devteam
parents:
diff changeset
663 cl.append('INPUT=%s' % opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
664 # outputs
b0585923decc Uploaded
devteam
parents:
diff changeset
665 cl.append('OUTPUT=%s' % opts.output)
b0585923decc Uploaded
devteam
parents:
diff changeset
666 cl.append('METRICS_FILE=%s' % pic.metricsOut )
b0585923decc Uploaded
devteam
parents:
diff changeset
667 # remove or mark duplicates
b0585923decc Uploaded
devteam
parents:
diff changeset
668 cl.append('REMOVE_DUPLICATES=%s' % opts.remdups)
b0585923decc Uploaded
devteam
parents:
diff changeset
669 # the regular expression to be used to parse reads in incoming SAM file
b0585923decc Uploaded
devteam
parents:
diff changeset
670 cl.append('READ_NAME_REGEX="%s"' % opts.readregex)
b0585923decc Uploaded
devteam
parents:
diff changeset
671 # maximum offset between two duplicate clusters
b0585923decc Uploaded
devteam
parents:
diff changeset
672 cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist)
b0585923decc Uploaded
devteam
parents:
diff changeset
673 stdouts,rval = pic.runPic(opts.jar, cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
674
b0585923decc Uploaded
devteam
parents:
diff changeset
675 elif pic.picname == 'FixMateInformation':
b0585923decc Uploaded
devteam
parents:
diff changeset
676 cl.append('I=%s' % opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
677 cl.append('O=%s' % tempout)
b0585923decc Uploaded
devteam
parents:
diff changeset
678 cl.append('SORT_ORDER=%s' % opts.sortorder)
b0585923decc Uploaded
devteam
parents:
diff changeset
679 stdouts,rval = pic.runPic(opts.jar,cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
680 haveTempout = True
b0585923decc Uploaded
devteam
parents:
diff changeset
681
b0585923decc Uploaded
devteam
parents:
diff changeset
682 elif pic.picname == 'ReorderSam':
b0585923decc Uploaded
devteam
parents:
diff changeset
683 # input
b0585923decc Uploaded
devteam
parents:
diff changeset
684 cl.append('INPUT=%s' % opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
685 # output
b0585923decc Uploaded
devteam
parents:
diff changeset
686 cl.append('OUTPUT=%s' % tempout)
b0585923decc Uploaded
devteam
parents:
diff changeset
687 # reference
b0585923decc Uploaded
devteam
parents:
diff changeset
688 cl.append('REFERENCE=%s' % ref_file_name)
b0585923decc Uploaded
devteam
parents:
diff changeset
689 # incomplete dict concordance
b0585923decc Uploaded
devteam
parents:
diff changeset
690 if opts.allow_inc_dict_concord == 'true':
b0585923decc Uploaded
devteam
parents:
diff changeset
691 cl.append('ALLOW_INCOMPLETE_DICT_CONCORDANCE=true')
b0585923decc Uploaded
devteam
parents:
diff changeset
692 # contig length discordance
b0585923decc Uploaded
devteam
parents:
diff changeset
693 if opts.allow_contig_len_discord == 'true':
b0585923decc Uploaded
devteam
parents:
diff changeset
694 cl.append('ALLOW_CONTIG_LENGTH_DISCORDANCE=true')
b0585923decc Uploaded
devteam
parents:
diff changeset
695 stdouts,rval = pic.runPic(opts.jar, cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
696 haveTempout = True
b0585923decc Uploaded
devteam
parents:
diff changeset
697
b0585923decc Uploaded
devteam
parents:
diff changeset
698 elif pic.picname == 'ReplaceSamHeader':
b0585923decc Uploaded
devteam
parents:
diff changeset
699 cl.append('INPUT=%s' % opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
700 cl.append('OUTPUT=%s' % tempout)
b0585923decc Uploaded
devteam
parents:
diff changeset
701 cl.append('HEADER=%s' % opts.header_file)
b0585923decc Uploaded
devteam
parents:
diff changeset
702 stdouts,rval = pic.runPic(opts.jar, cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
703 haveTempout = True
b0585923decc Uploaded
devteam
parents:
diff changeset
704
b0585923decc Uploaded
devteam
parents:
diff changeset
705 elif pic.picname == 'CalculateHsMetrics':
b0585923decc Uploaded
devteam
parents:
diff changeset
706 maxloglines = 100
b0585923decc Uploaded
devteam
parents:
diff changeset
707 baitfname = os.path.join(opts.outdir,'rgPicardHsMetrics.bait')
b0585923decc Uploaded
devteam
parents:
diff changeset
708 targetfname = os.path.join(opts.outdir,'rgPicardHsMetrics.target')
b0585923decc Uploaded
devteam
parents:
diff changeset
709 baitf = pic.makePicInterval(opts.baitbed,baitfname)
b0585923decc Uploaded
devteam
parents:
diff changeset
710 if opts.targetbed == opts.baitbed: # same file sometimes
b0585923decc Uploaded
devteam
parents:
diff changeset
711 targetf = baitf
b0585923decc Uploaded
devteam
parents:
diff changeset
712 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
713 targetf = pic.makePicInterval(opts.targetbed,targetfname)
b0585923decc Uploaded
devteam
parents:
diff changeset
714 cl.append('BAIT_INTERVALS=%s' % baitf)
b0585923decc Uploaded
devteam
parents:
diff changeset
715 cl.append('TARGET_INTERVALS=%s' % targetf)
b0585923decc Uploaded
devteam
parents:
diff changeset
716 cl.append('INPUT=%s' % os.path.abspath(opts.input))
b0585923decc Uploaded
devteam
parents:
diff changeset
717 cl.append('OUTPUT=%s' % pic.metricsOut)
b0585923decc Uploaded
devteam
parents:
diff changeset
718 cl.append('TMP_DIR=%s' % opts.tmpdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
719 stdouts,rval = pic.runPic(opts.jar,cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
720
b0585923decc Uploaded
devteam
parents:
diff changeset
721 elif pic.picname == 'ValidateSamFile':
b0585923decc Uploaded
devteam
parents:
diff changeset
722 import pysam
b0585923decc Uploaded
devteam
parents:
diff changeset
723 doTranspose = False
b0585923decc Uploaded
devteam
parents:
diff changeset
724 sortedfile = os.path.join(opts.outdir,'rgValidate.sorted')
b0585923decc Uploaded
devteam
parents:
diff changeset
725 stf = open(pic.log_filename,'w')
b0585923decc Uploaded
devteam
parents:
diff changeset
726 tlog = None
b0585923decc Uploaded
devteam
parents:
diff changeset
727 if opts.datatype == 'sam': # need to work with a bam
b0585923decc Uploaded
devteam
parents:
diff changeset
728 tlog,tempbam,rval = pic.samToBam(opts.input,opts.outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
729 try:
b0585923decc Uploaded
devteam
parents:
diff changeset
730 tlog = pic.sortSam(tempbam,sortedfile,opts.outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
731 except:
b0585923decc Uploaded
devteam
parents:
diff changeset
732 print '## exception on sorting sam file %s' % opts.input
b0585923decc Uploaded
devteam
parents:
diff changeset
733 else: # is already bam
b0585923decc Uploaded
devteam
parents:
diff changeset
734 try:
b0585923decc Uploaded
devteam
parents:
diff changeset
735 tlog = pic.sortSam(opts.input,sortedfile,opts.outdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
736 except: # bug - [bam_sort_core] not being ignored - TODO fixme
b0585923decc Uploaded
devteam
parents:
diff changeset
737 print '## exception on sorting bam file %s' % opts.input
b0585923decc Uploaded
devteam
parents:
diff changeset
738 if tlog:
b0585923decc Uploaded
devteam
parents:
diff changeset
739 print '##tlog=',tlog
b0585923decc Uploaded
devteam
parents:
diff changeset
740 stf.write(tlog)
b0585923decc Uploaded
devteam
parents:
diff changeset
741 stf.write('\n')
b0585923decc Uploaded
devteam
parents:
diff changeset
742 sortedfile = '%s.bam' % sortedfile # samtools does that
b0585923decc Uploaded
devteam
parents:
diff changeset
743 cl.append('O=%s' % pic.metricsOut)
b0585923decc Uploaded
devteam
parents:
diff changeset
744 cl.append('TMP_DIR=%s' % opts.tmpdir)
b0585923decc Uploaded
devteam
parents:
diff changeset
745 cl.append('I=%s' % sortedfile)
b0585923decc Uploaded
devteam
parents:
diff changeset
746 opts.maxerrors = '99999999'
b0585923decc Uploaded
devteam
parents:
diff changeset
747 cl.append('MAX_OUTPUT=%s' % opts.maxerrors)
b0585923decc Uploaded
devteam
parents:
diff changeset
748 if opts.ignoreflags[0] <> 'None': # picard error values to ignore
b0585923decc Uploaded
devteam
parents:
diff changeset
749 igs = ['IGNORE=%s' % x for x in opts.ignoreflags if x <> 'None']
b0585923decc Uploaded
devteam
parents:
diff changeset
750 cl.append(' '.join(igs))
b0585923decc Uploaded
devteam
parents:
diff changeset
751 if opts.bisulphite.lower() <> 'false':
b0585923decc Uploaded
devteam
parents:
diff changeset
752 cl.append('IS_BISULFITE_SEQUENCED=true')
b0585923decc Uploaded
devteam
parents:
diff changeset
753 if opts.ref <> None or opts.ref_file <> None:
b0585923decc Uploaded
devteam
parents:
diff changeset
754 cl.append('R=%s' % ref_file_name)
b0585923decc Uploaded
devteam
parents:
diff changeset
755 stdouts,rval = pic.runPic(opts.jar,cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
756 if opts.datatype == 'sam':
b0585923decc Uploaded
devteam
parents:
diff changeset
757 pic.delme.append(tempbam)
b0585923decc Uploaded
devteam
parents:
diff changeset
758 newsam = opts.output
b0585923decc Uploaded
devteam
parents:
diff changeset
759 outformat = 'bam'
b0585923decc Uploaded
devteam
parents:
diff changeset
760 pe = open(pic.metricsOut,'r').readlines()
b0585923decc Uploaded
devteam
parents:
diff changeset
761 pic.cleanSam(insam=sortedfile, newsam=newsam, picardErrors=pe,outformat=outformat)
b0585923decc Uploaded
devteam
parents:
diff changeset
762 pic.delme.append(sortedfile) # not wanted
b0585923decc Uploaded
devteam
parents:
diff changeset
763 stf.close()
b0585923decc Uploaded
devteam
parents:
diff changeset
764 pic.cleanup()
b0585923decc Uploaded
devteam
parents:
diff changeset
765
b0585923decc Uploaded
devteam
parents:
diff changeset
766 ####liubo added CleanSam tool####
b0585923decc Uploaded
devteam
parents:
diff changeset
767 elif pic.picname == 'CleanSam':
b0585923decc Uploaded
devteam
parents:
diff changeset
768 # input
b0585923decc Uploaded
devteam
parents:
diff changeset
769 cl.append('INPUT=%s' % opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
770 # output
b0585923decc Uploaded
devteam
parents:
diff changeset
771 cl.append('OUTPUT=%s' % tempout)
b0585923decc Uploaded
devteam
parents:
diff changeset
772 stdouts,rval = pic.runPic(opts.jar, cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
773 haveTempout = True
b0585923decc Uploaded
devteam
parents:
diff changeset
774
b0585923decc Uploaded
devteam
parents:
diff changeset
775 elif pic.picname == 'SortSam':
b0585923decc Uploaded
devteam
parents:
diff changeset
776 cl.append('I=%s' % opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
777 cl.append('O=%s' % tempout)
b0585923decc Uploaded
devteam
parents:
diff changeset
778 cl.append('SORT_ORDER=%s' % opts.sortorder)
b0585923decc Uploaded
devteam
parents:
diff changeset
779 stdouts,rval = pic.runPic(opts.jar,cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
780 haveTempout = True
b0585923decc Uploaded
devteam
parents:
diff changeset
781
b0585923decc Uploaded
devteam
parents:
diff changeset
782 elif pic.picname == 'BuildBamIndex':
b0585923decc Uploaded
devteam
parents:
diff changeset
783 cl.append('I=%s' % opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
784 cl.append('O=%s' % tempout)
b0585923decc Uploaded
devteam
parents:
diff changeset
785 stdouts,rval = pic.runPic(opts.jar,cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
786 haveTempout = True
b0585923decc Uploaded
devteam
parents:
diff changeset
787
b0585923decc Uploaded
devteam
parents:
diff changeset
788 elif pic.picname == 'SamFormatConverter':
b0585923decc Uploaded
devteam
parents:
diff changeset
789 cl.append('INPUT=%s' % opts.input)
b0585923decc Uploaded
devteam
parents:
diff changeset
790 cl.append('OUTPUT=%s' % tempout)
b0585923decc Uploaded
devteam
parents:
diff changeset
791 pic.runPic(opts.jar, cl)
b0585923decc Uploaded
devteam
parents:
diff changeset
792 haveTempout = True
b0585923decc Uploaded
devteam
parents:
diff changeset
793
b0585923decc Uploaded
devteam
parents:
diff changeset
794
b0585923decc Uploaded
devteam
parents:
diff changeset
795 else:
b0585923decc Uploaded
devteam
parents:
diff changeset
796 print >> sys.stderr,'picard.py got an unknown tool name - %s' % pic.picname
b0585923decc Uploaded
devteam
parents:
diff changeset
797 sys.exit(1)
b0585923decc Uploaded
devteam
parents:
diff changeset
798 if haveTempout:
b0585923decc Uploaded
devteam
parents:
diff changeset
799 # Some Picard tools produced a potentially intermediate bam file.
b0585923decc Uploaded
devteam
parents:
diff changeset
800 # Either just move to final location or create sam
b0585923decc Uploaded
devteam
parents:
diff changeset
801 if os.path.exists(tempout):
b0585923decc Uploaded
devteam
parents:
diff changeset
802 shutil.move(tempout, os.path.abspath(opts.output))
b0585923decc Uploaded
devteam
parents:
diff changeset
803 if opts.htmlout <> None or doFix: # return a pretty html page
b0585923decc Uploaded
devteam
parents:
diff changeset
804 pic.fixPicardOutputs(transpose=doTranspose,maxloglines=maxloglines)
b0585923decc Uploaded
devteam
parents:
diff changeset
805 if rval <> 0:
b0585923decc Uploaded
devteam
parents:
diff changeset
806 print >> sys.stderr, '## exit code=%d; stdout=%s' % (rval,stdouts)
b0585923decc Uploaded
devteam
parents:
diff changeset
807 # signal failure
b0585923decc Uploaded
devteam
parents:
diff changeset
808 if __name__=="__main__": __main__()