Mercurial > repos > devteam > picard1106

--- a/picard_MeanQualityByCylce.xml	Wed Feb 26 00:25:02 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,88 +0,0 @@
-<tool name="MeanQualityByCycle" id="picard_MeanQualityByCycle" version="1.106.0">
-<description>Calculates mean quality</description>
-<requirements><requirement type="package" version="1.106.0">picard</requirement></requirements>
-  <command interpreter="bash">
-  	<!--	generic_outformat_wrapper.sh MeanQualityByCycle $outFile $outputFormat
- 			CHART_OUTPUT=File	A file (with .pdf extension) to write the chart to. Required.
-
-          #if str( $ALIGNED_READS_ONLY ):
-              ALIGNED_READS_ONLY="${ALIGNED_READS_ONLY}"
-          #end if
-          #if str( $PF_READS_ONLY ):
-              PF_READS_ONLY="${PF_READS_ONLY}"
-          #end if
-          #if str( $INPUT ):
-              INPUT="${INPUT}"
-          #end if
-          #if str( $OUTPUT ):
-              OUTPUT="${OUTPUT}"
-          #end if
-          #if str( $REFERENCE_SEQUENCE ):
-              REFERENCE_SEQUENCE="${REFERENCE_SEQUENCE}"
-          #end if
-          #if str( $ASSUME_SORTED ):
-              ASSUME_SORTED="${ASSUME_SORTED}"
-          #end if
-          #if str( $STOP_AFTER ):
-              STOP_AFTER="${STOP_AFTER}"
-          #end if
-
-      VALIDATION_STRINGENCY=LENIENT
-      QUIET=True
-      TMP_DIR="${__new_file_path__}" -->
-  </command>
-
-  <stdio>
-    <exit_code range="0" level="warning" description="Tool finished correctly" />
-  </stdio>
-
-  <inputs>
-   <!--
-      <param format="XXCHANGEMEEE" name="ALIGNED_READS_ONLY" type="boolean" label="If set to true, calculate mean quality over aligned reads only. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false}" help="" />
-      <param format="XXCHANGEMEEE" name="PF_READS_ONLY" type="boolean" label="If set to true calculate mean quality over PF reads only. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false}" help="" />
-      <param format="XXCHANGEMEEE" name="INPUT" type="data" label="Input SAM or BAM file. Required." help="" />
-      <param format="XXCHANGEMEEE" name="OUTPUT" type="data" label="File to write the output to. Required." help="" />
-      <param format="XXCHANGEMEEE" name="REFERENCE_SEQUENCE" type="data" label="Reference sequence fasta Default value: null." help="" />
-      <param format="XXCHANGEMEEE" name="ASSUME_SORTED" type="boolean" label="If true (default), then the sort order in the header file will be ignored. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false}" help="" />
-      <param format="XXCHANGEMEEE" name="STOP_AFTER" type="string" label="Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value." help="" />
-  -->
-  </inputs>
-  <outputs>
-  <!--
-    <data name="outFile" format="XXCHANGEMEEE">
-
-    </data>
-      -->
-  </outputs>
-  <tests>
-  <test>
-     <!-- Here is a command line that works:
-        java -jar ...
-     -->
-     <!--
-      <param name="inputFile" value="XXCHANGEMEE-input" />
-      <output name="outFile" file="XXCHANGEMEE-correct-output" lines_diff="2" ftype="XXCHANGEMEE" />
-        -->
-    </test>
-  </tests>
-  <help>
-Picard documentation says:
-
-
-MeanQualityByCycle
-
-Usage: program [options...]
-
-Option	Description
-CHART_OUTPUT=File	A file (with .pdf extension) to write the chart to. Required.
-ALIGNED_READS_ONLY=Boolean	If set to true, calculate mean quality over aligned reads only. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false}
-PF_READS_ONLY=Boolean	If set to true calculate mean quality over PF reads only. Default value: false. This option can be set to 'null' to clear the default value. Possible values: {true, false}
-INPUT=File	Input SAM or BAM file. Required.
-OUTPUT=File	File to write the output to. Required.
-REFERENCE_SEQUENCE=File	Reference sequence fasta Default value: null.
-ASSUME_SORTED=Boolean	If true (default), then the sort order in the header file will be ignored. Default value: true. This option can be set to 'null' to clear the default value. Possible values: {true, false}
-STOP_AFTER=Long	Stop after processing N reads, mainly for debugging. Default value: 0. This option can be set to 'null' to clear the default value.
-
-
-  </help>
-</tool>
--- a/picard_wrapper.py	Wed Feb 26 00:25:02 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,823 +0,0 @@
-#!/usr/bin/env python
-"""
-Originally written by Kelly Vincent
-pretty output and additional picard wrappers by Ross Lazarus for rgenetics
-Runs all available wrapped Picard tools.
-usage: picard_wrapper.py [options]
-code Ross wrote licensed under the LGPL
-see http://www.gnu.org/copyleft/lesser.html
-"""
-
-import optparse, os, sys, subprocess, tempfile, shutil, time, logging
-
-galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Galaxy %s tool output - see http://getgalaxy.org/" />
-<title></title>
-<link rel="stylesheet" href="/static/style/base.css" type="text/css" />
-</head>
-<body>
-<div class="document">
-"""
-galhtmlattr = """Galaxy tool %s run at %s</b><br/>"""
-galhtmlpostfix = """</div></body></html>\n"""
-
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-
-def timenow():
-    """return current time as a string
-"""
-    return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
-
-
-class PicardBase():
-    """
-simple base class with some utilities for Picard
-adapted and merged with Kelly Vincent's code april 2011 Ross
-lots of changes...
-"""
-
-    def __init__(self, opts=None,arg0=None):
-        """ common stuff needed at init for a picard tool
-"""
-        assert opts <> None, 'PicardBase needs opts at init'
-        self.opts = opts
-        if self.opts.outdir == None:
-             self.opts.outdir = os.getcwd() # fixmate has no html file eg so use temp dir
-        assert self.opts.outdir <> None,'## PicardBase needs a temp directory if no output directory passed in'
-        self.picname = self.baseName(opts.jar)
-        if self.picname.startswith('picard'):
-            self.picname = opts.picard_cmd # special case for some tools like replaceheader?
-        self.progname = self.baseName(arg0)
-        self.version = '0.002'
-        self.delme = [] # list of files to destroy
-        self.title = opts.title
-        self.inputfile = opts.input
-        try:
-            os.makedirs(opts.outdir)
-        except:
-            pass
-        try:
-            os.makedirs(opts.tmpdir)
-        except:
-            pass
-        self.log_filename = os.path.join(self.opts.outdir,'%s.log' % self.picname)
-        self.metricsOut = os.path.join(opts.outdir,'%s.metrics.txt' % self.picname)
-        self.setLogging(logfname=self.log_filename)
-
-    def baseName(self,name=None):
-        return os.path.splitext(os.path.basename(name))[0]
-
-    def setLogging(self,logfname="picard_wrapper.log"):
-        """setup a logger
-"""
-        logging.basicConfig(level=logging.INFO,
-                    filename=logfname,
-                    filemode='a')
-
-
-    def readLarge(self,fname=None):
-        """ read a potentially huge file.
-"""
-        try:
-            # get stderr, allowing for case where it's very large
-            tmp = open( fname, 'rb' )
-            s = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    more = tmp.read( buffsize )
-                    if len(more) > 0:
-                        s += more
-                    else:
-                        break
-            except OverflowError:
-                pass
-            tmp.close()
-        except Exception, e:
-            stop_err( 'Read Large Exception : %s' % str( e ) )
-        return s
-
-    def runCL(self,cl=None,output_dir=None):
-        """ construct and run a command line
-we have galaxy's temp path as opt.temp_dir so don't really need isolation
-sometimes stdout is needed as the output - ugly hacks to deal with potentially vast artifacts
-"""
-        assert cl <> None, 'PicardBase runCL needs a command line as cl'
-        if output_dir == None:
-            output_dir = self.opts.outdir
-        if type(cl) == type([]):
-            cl = ' '.join(cl)
-        fd,templog = tempfile.mkstemp(dir=output_dir,suffix='rgtempRun.txt')
-        tlf = open(templog,'wb')
-        fd,temperr = tempfile.mkstemp(dir=output_dir,suffix='rgtempErr.txt')
-        tef = open(temperr,'wb')
-        process = subprocess.Popen(cl, shell=True, stderr=tef, stdout=tlf, cwd=output_dir)
-        rval = process.wait()
-        tlf.close()
-        tef.close()
-        stderrs = self.readLarge(temperr)
-        stdouts = self.readLarge(templog)
-        if rval > 0:
-            s = '## executing %s returned status %d and stderr: \n%s\n' % (cl,rval,stderrs)
-            stdouts = '%s\n%s' % (stdouts,stderrs)
-        else:
-            s = '## executing %s returned status %d and nothing on stderr\n' % (cl,rval)
-        logging.info(s)
-        os.unlink(templog) # always
-        os.unlink(temperr) # always
-        return s, stdouts, rval # sometimes s is an output
-
-    def runPic(self, jar, cl):
-        """
-cl should be everything after the jar file name in the command
-"""
-        runme = ['java -Xmx%s' % self.opts.maxjheap]
-        runme.append(" -Djava.io.tmpdir='%s' " % self.opts.tmpdir)
-        runme.append('-jar %s' % jar)
-        runme += cl
-
-        print runme,self.opts.outdir
-
-        s,stdouts,rval = self.runCL(cl=runme, output_dir=self.opts.outdir)
-        return stdouts,rval
-
-    def samToBam(self,infile=None,outdir=None):
-        """
-use samtools view to convert sam to bam
-"""
-        fd,tempbam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.bam')
-        cl = ['samtools view -h -b -S -o ',tempbam,infile]
-        tlog,stdouts,rval = self.runCL(cl,outdir)
-        return tlog,tempbam,rval
-
-    def sortSam(self, infile=None,outfile=None,outdir=None):
-        """
-"""
-        print '## sortSam got infile=%s,outfile=%s,outdir=%s' % (infile,outfile,outdir)
-        cl = ['samtools sort',infile,outfile]
-        tlog,stdouts,rval = self.runCL(cl,outdir)
-        return tlog
-
-    def cleanup(self):
-        for fname in self.delme:
-            try:
-                os.unlink(fname)
-            except:
-                pass
-
-    def prettyPicout(self,transpose,maxrows):
-        """organize picard outpouts into a report html page
-"""
-        res = []
-        try:
-            r = open(self.metricsOut,'r').readlines()
-        except:
-            r = []
-        if len(r) > 0:
-            res.append('<b>Picard on line resources</b><ul>\n')
-            res.append('<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li>\n')
-            res.append('<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/>\n')
-            if transpose:
-                res.append('<b>Picard output (transposed to make it easier to see)</b><hr/>\n')
-            else:
-                res.append('<b>Picard output</b><hr/>\n')
-            res.append('<table cellpadding="3" >\n')
-            dat = []
-            heads = []
-            lastr = len(r) - 1
-            # special case for estimate library complexity hist
-            thist = False
-            for i,row in enumerate(r):
-                if row.strip() > '':
-                    srow = row.split('\t')
-                    if row.startswith('#'):
-                        heads.append(row.strip()) # want strings
-                    else:
-                        dat.append(srow) # want lists
-                    if row.startswith('## HISTOGRAM'):
-                        thist = True
-            if len(heads) > 0:
-                hres = ['<tr class="d%d"><td colspan="2">%s</td></tr>' % (i % 2,x) for i,x in enumerate(heads)]
-                res += hres
-                heads = []
-            if len(dat) > 0:
-                if transpose and not thist:
-                    tdat = map(None,*dat) # transpose an arbitrary list of lists
-                    tdat = ['<tr class="d%d"><td>%s</td><td>%s&nbsp;</td></tr>\n' % ((i+len(heads)) % 2,x[0],x[1]) for i,x in enumerate(tdat)]
-                else:
-                    tdat = ['\t'.join(x).strip() for x in dat] # back to strings :(
-                    tdat = ['<tr class="d%d"><td colspan="2">%s</td></tr>\n' % ((i+len(heads)) % 2,x) for i,x in enumerate(tdat)]
-                res += tdat
-                dat = []
-            res.append('</table>\n')
-        return res
-
-    def fixPicardOutputs(self,transpose,maxloglines):
-        """
-picard produces long hard to read tab header files
-make them available but present them transposed for readability
-"""
-        logging.shutdown()
-        self.cleanup() # remove temp files stored in delme
-        rstyle="""<style type="text/css">
-tr.d0 td {background-color: oldlace; color: black;}
-tr.d1 td {background-color: aliceblue; color: black;}
-</style>"""
-        res = [rstyle,]
-        res.append(galhtmlprefix % self.progname)
-        res.append(galhtmlattr % (self.picname,timenow()))
-        flist = [x for x in os.listdir(self.opts.outdir) if not x.startswith('.')]
-        pdflist = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']
-        if len(pdflist) > 0: # assumes all pdfs come with thumbnail .jpgs
-            for p in pdflist:
-                pbase = os.path.splitext(p)[0] # removes .pdf
-                imghref = '%s.jpg' % pbase
-                mimghref = '%s-0.jpg' % pbase # multiple pages pdf -> multiple thumbnails without asking!
-                if mimghref in flist:
-                    imghref=mimghref # only one for thumbnail...it's a multi page pdf
-                res.append('<table cellpadding="10"><tr><td>\n')
-                res.append('<a href="%s"><img src="%s" title="Click image preview for a print quality PDF version" hspace="10" align="middle"></a>\n' % (p,imghref))
-                res.append('</tr></td></table>\n')
-        if len(flist) > 0:
-            res.append('<b>The following output files were created (click the filename to view/download a copy):</b><hr/>')
-            res.append('<table>\n')
-            for i,f in enumerate(flist):
-                fn = os.path.split(f)[-1]
-                res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fn))
-            res.append('</table><p/>\n')
-        pres = self.prettyPicout(transpose,maxloglines)
-        if len(pres) > 0:
-            res += pres
-        l = open(self.log_filename,'r').readlines()
-        llen = len(l)
-        if llen > 0:
-            res.append('<b>Picard Tool Run Log</b><hr/>\n')
-            rlog = ['<pre>',]
-            if llen > maxloglines:
-                n = min(50,int(maxloglines/2))
-                rlog += l[:n]
-                rlog.append('------------ ## %d rows deleted ## --------------\n' % (llen-maxloglines))
-                rlog += l[-n:]
-            else:
-                rlog += l
-            rlog.append('</pre>')
-            if llen > maxloglines:
-                rlog.append('\n<b>## WARNING - %d log lines truncated - <a href="%s">%s</a> contains entire output</b>' % (llen - maxloglines,self.log_filename,self.log_filename))
-            res += rlog
-        else:
-            res.append("### Odd, Picard left no log file %s - must have really barfed badly?\n" % self.log_filename)
-        res.append('<hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n')
-        res.append( 'generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool')
-        res.append(galhtmlpostfix)
-        outf = open(self.opts.htmlout,'w')
-        outf.write(''.join(res))
-        outf.write('\n')
-        outf.close()
-
-    def makePicInterval(self,inbed=None,outf=None):
-        """
-picard wants bait and target files to have the same header length as the incoming bam/sam
-a meaningful (ie accurate) representation will fail because of this - so this hack
-it would be far better to be able to supply the original bed untouched
-Additional checking added Ross Lazarus Dec 2011 to deal with two 'bug' reports on the list
-"""
-        assert inbed <> None
-        bed = open(inbed,'r').readlines()
-        sbed = [x.split('\t') for x in bed] # lengths MUST be 5
-        lens = [len(x) for x in sbed]
-        strands = [x[3] for x in sbed if not x[3] in ['+','-']]
-        maxl = max(lens)
-        minl = min(lens)
-        e = []
-        if maxl <> minl:
-            e.append("## Input error: Inconsistent field count in %s - please read the documentation on bait/target format requirements, fix and try again" % inbed)
-        if maxl <> 5:
-            e.append("## Input error: %d fields found in %s, 5 required - please read the warning and documentation on bait/target format requirements, fix and try again" % (maxl,inbed))
-        if len(strands) > 0:
-            e.append("## Input error: Fourth column in %s is not the required strand (+ or -) - please read the warning and documentation on bait/target format requirements, fix and try again" % (inbed))
-        if len(e) > 0: # write to stderr and quit
-            print >> sys.stderr, '\n'.join(e)
-            sys.exit(1)
-        thead = os.path.join(self.opts.outdir,'tempSamHead.txt')
-        if self.opts.datatype == 'sam':
-            cl = ['samtools view -H -S',self.opts.input,'>',thead]
-        else:
-            cl = ['samtools view -H',self.opts.input,'>',thead]
-        self.runCL(cl=cl,output_dir=self.opts.outdir)
-        head = open(thead,'r').readlines()
-        s = '## got %d rows of header\n' % (len(head))
-        logging.info(s)
-        o = open(outf,'w')
-        o.write(''.join(head))
-        o.write(''.join(bed))
-        o.close()
-        return outf
-
-    def cleanSam(self, insam=None, newsam=None, picardErrors=[],outformat=None):
-        """
-interesting problem - if paired, must remove mate pair of errors too or we have a new set of errors after cleaning - missing mate pairs!
-Do the work of removing all the error sequences
-pysam is cool
-infile = pysam.Samfile( "-", "r" )
-outfile = pysam.Samfile( "-", "w", template = infile )
-for s in infile: outfile.write(s)
-
-errors from ValidateSameFile.jar look like
-WARNING: Record 32, Read name SRR006041.1202260, NM tag (nucleotide differences) is missing
-ERROR: Record 33, Read name SRR006041.1042721, Empty sequence dictionary.
-ERROR: Record 33, Read name SRR006041.1042721, RG ID on SAMRecord not found in header: SRR006041
-
-"""
-        assert os.path.isfile(insam), 'rgPicardValidate cleansam needs an input sam file - cannot find %s' % insam
-        assert newsam <> None, 'rgPicardValidate cleansam needs an output new sam file path'
-        removeNames = [x.split(',')[1].replace(' Read name ','') for x in picardErrors if len(x.split(',')) > 2]
-        remDict = dict(zip(removeNames,range(len(removeNames))))
-        infile = pysam.Samfile(insam,'rb')
-        info = 'found %d error sequences in picardErrors, %d unique' % (len(removeNames),len(remDict))
-        if len(removeNames) > 0:
-            outfile = pysam.Samfile(newsam,'wb',template=infile) # template must be an open file
-            i = 0
-            j = 0
-            for row in infile:
-                dropme = remDict.get(row.qname,None) # keep if None
-                if not dropme:
-                    outfile.write(row)
-                    j += 1
-                else: # discard
-                    i += 1
-            info = '%s\n%s' % (info, 'Discarded %d lines writing %d to %s from %s' % (i,j,newsam,insam))
-            outfile.close()
-            infile.close()
-        else: # we really want a nullop or a simple pointer copy
-            infile.close()
-            if newsam:
-                shutil.copy(insam,newsam)
-        logging.info(info)
-
-
-
-def __main__():
-    doFix = False # tools returning htmlfile don't need this
-    doTranspose = True # default
-    maxloglines = 100 # default
-    #Parse Command Line
-    op = optparse.OptionParser()
-    # All tools
-    op.add_option('-i', '--input', dest='input', help='Input SAM or BAM file' )
-    op.add_option('-e', '--inputext', default=None)
-    op.add_option('-o', '--output', default=None)
-    op.add_option('-n', '--title', default="Pick a Picard Tool")
-    op.add_option('-t', '--htmlout', default=None)
-    op.add_option('-d', '--outdir', default=None)
-    op.add_option('-x', '--maxjheap', default='3000m')
-    op.add_option('-b', '--bisulphite', default='false')
-    op.add_option('-s', '--sortorder', default='query')
-    op.add_option('','--tmpdir', default='/tmp')
-    op.add_option('-j','--jar',default='')
-    op.add_option('','--picard-cmd',default=None)
-    # Many tools
-    op.add_option( '', '--output-format', dest='output_format', help='Output format' )
-    op.add_option( '', '--bai-file', dest='bai_file', help='The path to the index file for the input bam file' )
-    op.add_option( '', '--ref', dest='ref', help='Built-in reference with fasta and dict file', default=None )
-    # CreateSequenceDictionary
-    op.add_option( '', '--ref-file', dest='ref_file', help='Fasta to use as reference', default=None )
-    op.add_option( '', '--species-name', dest='species_name', help='Species name to use in creating dict file from fasta file' )
-    op.add_option( '', '--build-name', dest='build_name', help='Name of genome assembly to use in creating dict file from fasta file' )
-    op.add_option( '', '--trunc-names', dest='trunc_names', help='Truncate sequence names at first whitespace from fasta file' )
-    # MarkDuplicates
-    op.add_option( '', '--remdups', default='true', help='Remove duplicates from output file' )
-    op.add_option( '', '--optdupdist', default="100", help='Maximum pixels between two identical sequences in order to consider them optical duplicates.' )
-    # CollectInsertSizeMetrics
-    op.add_option('', '--taillimit', default="0")
-    op.add_option('', '--histwidth', default="0")
-    op.add_option('', '--minpct', default="0.01")
-    op.add_option('', '--malevel', default='')
-    op.add_option('', '--deviations', default="0.0")
-    # CollectAlignmentSummaryMetrics
-    op.add_option('', '--maxinsert', default="20")
-    op.add_option('', '--adaptors', default='')
-    # FixMateInformation and validate
-    # CollectGcBiasMetrics
-    op.add_option('', '--windowsize', default='100')
-    op.add_option('', '--mingenomefrac', default='0.00001')
-    # AddOrReplaceReadGroups
-    op.add_option( '', '--rg-opts', dest='rg_opts', help='Specify extra (optional) arguments with full, otherwise preSet' )
-    op.add_option( '', '--rg-lb', dest='rg_library', help='Read Group Library' )
-    op.add_option( '', '--rg-pl', dest='rg_platform', help='Read Group platform (e.g. illumina, solid)' )
-    op.add_option( '', '--rg-pu', dest='rg_plat_unit', help='Read Group platform unit (eg. run barcode) ' )
-    op.add_option( '', '--rg-sm', dest='rg_sample', help='Read Group sample name' )
-    op.add_option( '', '--rg-id', dest='rg_id', help='Read Group ID' )
-    op.add_option( '', '--rg-cn', dest='rg_seq_center', help='Read Group sequencing center name' )
-    op.add_option( '', '--rg-ds', dest='rg_desc', help='Read Group description' )
-    # ReorderSam
-    op.add_option( '', '--allow-inc-dict-concord', dest='allow_inc_dict_concord', help='Allow incomplete dict concordance' )
-    op.add_option( '', '--allow-contig-len-discord', dest='allow_contig_len_discord', help='Allow contig length discordance' )
-    # ReplaceSamHeader
-    op.add_option( '', '--header-file', dest='header_file', help='sam or bam file from which header will be read' )
-
-    op.add_option('','--assumesorted', default='true')
-    op.add_option('','--readregex', default="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*")
-    #estimatelibrarycomplexity
-    op.add_option('','--minid', default="5")
-    op.add_option('','--maxdiff', default="0.03")
-    op.add_option('','--minmeanq', default="20")
-    #hsmetrics
-    op.add_option('','--baitbed', default=None)
-    op.add_option('','--targetbed', default=None)
-    #validate
-    op.add_option('','--ignoreflags', action='append', type="string")
-    op.add_option('','--maxerrors', default=None)
-    op.add_option('','--datatype', default=None)
-    op.add_option('','--bamout', default=None)
-    op.add_option('','--samout', default=None)
-    #downsample
-    op.add_option('','--probability', default="1")
-    op.add_option('','--seed', default="1")
-
-    opts, args = op.parse_args()
-    opts.sortme = opts.assumesorted == 'false'
-    assert opts.input <> None
-    # need to add
-    # instance that does all the work
-    pic = PicardBase(opts,sys.argv[0])
-
-    tmp_dir = opts.outdir
-    haveTempout = False # we use this where sam output is an option
-    rval = 0
-    stdouts = 'Not run yet'
-    # set ref and dict files to use (create if necessary)
-    ref_file_name = opts.ref
-    if opts.ref_file <> None:
-        csd = 'CreateSequenceDictionary'
-        realjarpath = os.path.split(opts.jar)[0]
-        jarpath = os.path.join(realjarpath,'%s.jar' % csd) # for refseq
-        tmp_ref_fd, tmp_ref_name = tempfile.mkstemp( dir=opts.tmpdir , prefix = pic.picname)
-        ref_file_name = '%s.fasta' % tmp_ref_name
-        # build dict
-        dict_file_name = '%s.dict' % tmp_ref_name
-        os.symlink( opts.ref_file, ref_file_name )
-        cl = ['REFERENCE=%s' % ref_file_name]
-        cl.append('OUTPUT=%s' % dict_file_name)
-        cl.append('URI=%s' % os.path.basename( opts.ref_file ))
-        cl.append('TRUNCATE_NAMES_AT_WHITESPACE=%s' % opts.trunc_names)
-        if opts.species_name:
-            cl.append('SPECIES=%s' % opts.species_name)
-        if opts.build_name:
-            cl.append('GENOME_ASSEMBLY=%s' % opts.build_name)
-        pic.delme.append(dict_file_name)
-        pic.delme.append(ref_file_name)
-        pic.delme.append(tmp_ref_name)
-        stdouts,rval = pic.runPic(jarpath, cl)
-        # run relevant command(s)
-
-    # define temporary output
-    # if output is sam, it must have that extension, otherwise bam will be produced
-    # specify sam or bam file with extension
-    if opts.output_format == 'sam':
-        suff = '.sam'
-    else:
-        suff = ''
-    tmp_fd, tempout = tempfile.mkstemp( dir=opts.tmpdir, suffix=suff )
-
-    cl = ['VALIDATION_STRINGENCY=LENIENT',]
-
-    if pic.picname == 'AddOrReplaceReadGroups':
-        # sort order to match Galaxy's default
-        cl.append('SORT_ORDER=coordinate')
-        # input
-        cl.append('INPUT=%s' % opts.input)
-        # outputs
-        cl.append('OUTPUT=%s' % tempout)
-        # required read groups
-        cl.append('RGLB="%s"' % opts.rg_library)
-        cl.append('RGPL="%s"' % opts.rg_platform)
-        cl.append('RGPU="%s"' % opts.rg_plat_unit)
-        cl.append('RGSM="%s"' % opts.rg_sample)
-        if opts.rg_id:
-            cl.append('RGID="%s"' % opts.rg_id)
-        # optional read groups
-        if opts.rg_seq_center:
-            cl.append('RGCN="%s"' % opts.rg_seq_center)
-        if opts.rg_desc:
-            cl.append('RGDS="%s"' % opts.rg_desc)
-        stdouts,rval = pic.runPic(opts.jar, cl)
-        haveTempout = True
-
-    elif pic.picname == 'BamIndexStats':
-        tmp_fd, tmp_name = tempfile.mkstemp( dir=tmp_dir )
-        tmp_bam_name = '%s.bam' % tmp_name
-        tmp_bai_name = '%s.bai' % tmp_bam_name
-        os.symlink( opts.input, tmp_bam_name )
-        os.symlink( opts.bai_file, tmp_bai_name )
-        cl.append('INPUT=%s' % ( tmp_bam_name ))
-        pic.delme.append(tmp_bam_name)
-        pic.delme.append(tmp_bai_name)
-        pic.delme.append(tmp_name)
-        stdouts,rval = pic.runPic( opts.jar, cl )
-        f = open(pic.metricsOut,'a')
-        f.write(stdouts) # got this on stdout from runCl
-        f.write('\n')
-        f.close()
-        doTranspose = False # but not transposed
-
-    elif pic.picname == 'EstimateLibraryComplexity':
-        cl.append('I=%s' % opts.input)
-        cl.append('O=%s' % pic.metricsOut)
-        if float(opts.minid) > 0:
-            cl.append('MIN_IDENTICAL_BASES=%s' % opts.minid)
-        if float(opts.maxdiff) > 0.0:
-            cl.append('MAX_DIFF_RATE=%s' % opts.maxdiff)
-        if float(opts.minmeanq) > 0:
-            cl.append('MIN_MEAN_QUALITY=%s' % opts.minmeanq)
-        if opts.readregex > '':
-            cl.append('READ_NAME_REGEX="%s"' % opts.readregex)
-        if float(opts.optdupdist) > 0:
-            cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist)
-        stdouts,rval = pic.runPic(opts.jar, cl)
-
-    elif pic.picname == 'CollectAlignmentSummaryMetrics':
-        # Why do we do this fakefasta thing?
-        # Because we need NO fai to be available or picard barfs unless it matches the input data.
-        # why? Dunno Seems to work without complaining if the .bai file is AWOL....
-        fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name))
-        try:
-            os.symlink(ref_file_name,fakefasta)
-        except:
-            s = '## unable to symlink %s to %s - different devices? Will shutil.copy'
-            info = s
-            shutil.copy(ref_file_name,fakefasta)
-        pic.delme.append(fakefasta)
-        cl.append('ASSUME_SORTED=true')
-        adaptlist = opts.adaptors.split(',')
-        adaptorseqs = ['ADAPTER_SEQUENCE=%s' % x for x in adaptlist]
-        cl += adaptorseqs
-        cl.append('IS_BISULFITE_SEQUENCED=%s' % opts.bisulphite)
-        cl.append('MAX_INSERT_SIZE=%s' % opts.maxinsert)
-        cl.append('OUTPUT=%s' % pic.metricsOut)
-        cl.append('R=%s' % fakefasta)
-        cl.append('TMP_DIR=%s' % opts.tmpdir)
-        if not opts.assumesorted.lower() == 'true': # we need to sort input
-            sortedfile = '%s.sorted' % os.path.basename(opts.input)
-            if opts.datatype == 'sam': # need to work with a bam
-                tlog,tempbam,trval = pic.samToBam(opts.input,opts.outdir)
-                pic.delme.append(tempbam)
-                try:
-                    tlog = pic.sortSam(tempbam,sortedfile,opts.outdir)
-                except:
-                    print '## exception on sorting sam file %s' % opts.input
-            else: # is already bam
-                try:
-                    tlog = pic.sortSam(opts.input,sortedfile,opts.outdir)
-                except : # bug - [bam_sort_core] not being ignored - TODO fixme
-                    print '## exception %s on sorting bam file %s' % (sys.exc_info()[0],opts.input)
-            cl.append('INPUT=%s.bam' % os.path.abspath(os.path.join(opts.outdir,sortedfile)))
-            pic.delme.append(os.path.join(opts.outdir,sortedfile))
-        else:
-            cl.append('INPUT=%s' % os.path.abspath(opts.input))
-        stdouts,rval = pic.runPic(opts.jar, cl)
-
-
-    elif pic.picname == 'CollectGcBiasMetrics':
-        assert os.path.isfile(ref_file_name),'PicardGC needs a reference sequence - cannot read %s' % ref_file_name
-        # sigh. Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data.
-        # why? Dunno
-        fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name))
-        try:
-            os.symlink(ref_file_name,fakefasta)
-        except:
-            s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy'
-            info = s
-            shutil.copy(ref_file_name,fakefasta)
-        pic.delme.append(fakefasta)
-        x = 'rgPicardGCBiasMetrics'
-        pdfname = '%s.pdf' % x
-        jpgname = '%s.jpg' % x
-        tempout = os.path.join(opts.outdir,'rgPicardGCBiasMetrics.out')
-        temppdf = os.path.join(opts.outdir,pdfname)
-        cl.append('R=%s' % fakefasta)
-        cl.append('WINDOW_SIZE=%s' % opts.windowsize)
-        cl.append('MINIMUM_GENOME_FRACTION=%s' % opts.mingenomefrac)
-        cl.append('INPUT=%s' % opts.input)
-        cl.append('OUTPUT=%s' % tempout)
-        cl.append('TMP_DIR=%s' % opts.tmpdir)
-        cl.append('CHART_OUTPUT=%s' % temppdf)
-        cl.append('SUMMARY_OUTPUT=%s' % pic.metricsOut)
-        stdouts,rval = pic.runPic(opts.jar, cl)
-        if os.path.isfile(temppdf):
-            cl2 = ['convert','-resize x400',temppdf,os.path.join(opts.outdir,jpgname)] # make the jpg for fixPicardOutputs to find
-            s,stdouts,rval = pic.runCL(cl=cl2,output_dir=opts.outdir)
-        else:
-            s='### runGC: Unable to find pdf %s - please check the log for the causal problem\n' % temppdf
-        lf = open(pic.log_filename,'a')
-        lf.write(s)
-        lf.write('\n')
-        lf.close()
-
-    elif pic.picname == 'CollectInsertSizeMetrics':
-        """ <command interpreter="python">
-picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" --deviations "$deviations"
---histwidth "$histWidth" --minpct "$minPct" --malevel "$malevel"
--j "${GALAXY_DATA_INDEX_DIR}/shared/jars/picard/CollectInsertSizeMetrics.jar" -d "$html_file.files_path" -t "$html_file"
-</command>
-"""
-        isPDF = 'InsertSizeHist.pdf'
-        pdfpath = os.path.join(opts.outdir,isPDF)
-        histpdf = 'InsertSizeHist.pdf'
-        cl.append('I=%s' % opts.input)
-        cl.append('O=%s' % pic.metricsOut)
-        cl.append('HISTOGRAM_FILE=%s' % histpdf)
-        #if opts.taillimit <> '0': # this was deprecated although still mentioned in the docs at 1.56
-        # cl.append('TAIL_LIMIT=%s' % opts.taillimit)
-        if opts.histwidth <> '0':
-            cl.append('HISTOGRAM_WIDTH=%s' % opts.histwidth)
-        if float( opts.minpct) > 0.0:
-            cl.append('MINIMUM_PCT=%s' % opts.minpct)
-        if float(opts.deviations) > 0.0:
-            cl.append('DEVIATIONS=%s' % opts.deviations)
-        if opts.malevel:
-            malists = opts.malevel.split(',')
-            malist = ['METRIC_ACCUMULATION_LEVEL=%s' % x for x in malists]
-            cl += malist
-        stdouts,rval = pic.runPic(opts.jar, cl)
-        if os.path.exists(pdfpath): # automake thumbnail - will be added to html
-            cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath]
-            pic.runCL(cl=cl2,output_dir=opts.outdir)
-        else:
-            s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath
-            s += 'This <b>always happens if single ended data was provided</b> to this tool,\n'
-            s += 'so please double check that your input data really is paired-end NGS data.<br/>\n'
-            s += 'If your input was paired data this may be a bug worth reporting to the galaxy-bugs list\n<br/>'
-            logging.info(s)
-        if len(stdouts) > 0:
-           logging.info(stdouts)
-
-    elif pic.picname == 'MarkDuplicates':
-        # assume sorted even if header says otherwise
-        cl.append('ASSUME_SORTED=%s' % (opts.assumesorted))
-        # input
-        cl.append('INPUT=%s' % opts.input)
-        # outputs
-        cl.append('OUTPUT=%s' % opts.output)
-        cl.append('METRICS_FILE=%s' % pic.metricsOut )
-        # remove or mark duplicates
-        cl.append('REMOVE_DUPLICATES=%s' % opts.remdups)
-        # the regular expression to be used to parse reads in incoming SAM file
-        cl.append('READ_NAME_REGEX="%s"' % opts.readregex)
-        # maximum offset between two duplicate clusters
-        cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist)
-        stdouts,rval = pic.runPic(opts.jar, cl)
-
-    elif pic.picname == 'FixMateInformation':
-        cl.append('I=%s' % opts.input)
-        cl.append('O=%s' % tempout)
-        cl.append('SORT_ORDER=%s' % opts.sortorder)
-        stdouts,rval = pic.runPic(opts.jar,cl)
-        haveTempout = True
-
-    elif pic.picname == 'ReorderSam':
-        # input
-        cl.append('INPUT=%s' % opts.input)
-        # output
-        cl.append('OUTPUT=%s' % tempout)
-        # reference
-        cl.append('REFERENCE=%s' % ref_file_name)
-        # incomplete dict concordance
-        if opts.allow_inc_dict_concord == 'true':
-            cl.append('ALLOW_INCOMPLETE_DICT_CONCORDANCE=true')
-        # contig length discordance
-        if opts.allow_contig_len_discord == 'true':
-            cl.append('ALLOW_CONTIG_LENGTH_DISCORDANCE=true')
-        stdouts,rval = pic.runPic(opts.jar, cl)
-        haveTempout = True
-
-    elif pic.picname == 'ReplaceSamHeader':
-        cl.append('INPUT=%s' % opts.input)
-        cl.append('OUTPUT=%s' % tempout)
-        cl.append('HEADER=%s' % opts.header_file)
-        stdouts,rval = pic.runPic(opts.jar, cl)
-        haveTempout = True
-
-    elif pic.picname == 'CalculateHsMetrics':
-        maxloglines = 100
-        baitfname = os.path.join(opts.outdir,'rgPicardHsMetrics.bait')
-        targetfname = os.path.join(opts.outdir,'rgPicardHsMetrics.target')
-        baitf = pic.makePicInterval(opts.baitbed,baitfname)
-        if opts.targetbed == opts.baitbed: # same file sometimes
-            targetf = baitf
-        else:
-            targetf = pic.makePicInterval(opts.targetbed,targetfname)
-        cl.append('BAIT_INTERVALS=%s' % baitf)
-        cl.append('TARGET_INTERVALS=%s' % targetf)
-        cl.append('INPUT=%s' % os.path.abspath(opts.input))
-        cl.append('OUTPUT=%s' % pic.metricsOut)
-        cl.append('TMP_DIR=%s' % opts.tmpdir)
-        stdouts,rval = pic.runPic(opts.jar,cl)
-
-    elif pic.picname == 'ValidateSamFile':
-        import pysam
-        doTranspose = False
-        sortedfile = os.path.join(opts.outdir,'rgValidate.sorted')
-        stf = open(pic.log_filename,'w')
-        tlog = None
-        if opts.datatype == 'sam': # need to work with a bam
-            tlog,tempbam,rval = pic.samToBam(opts.input,opts.outdir)
-            try:
-                tlog = pic.sortSam(tempbam,sortedfile,opts.outdir)
-            except:
-                print '## exception on sorting sam file %s' % opts.input
-        else: # is already bam
-            try:
-                tlog = pic.sortSam(opts.input,sortedfile,opts.outdir)
-            except: # bug - [bam_sort_core] not being ignored - TODO fixme
-                print '## exception on sorting bam file %s' % opts.input
-        if tlog:
-            print '##tlog=',tlog
-            stf.write(tlog)
-            stf.write('\n')
-        sortedfile = '%s.bam' % sortedfile # samtools does that
-        cl.append('O=%s' % pic.metricsOut)
-        cl.append('TMP_DIR=%s' % opts.tmpdir)
-        cl.append('I=%s' % sortedfile)
-        opts.maxerrors = '99999999'
-        cl.append('MAX_OUTPUT=%s' % opts.maxerrors)
-        if opts.ignoreflags[0] <> 'None': # picard error values to ignore
-            igs = ['IGNORE=%s' % x for x in opts.ignoreflags if x <> 'None']
-            cl.append(' '.join(igs))
-        if opts.bisulphite.lower() <> 'false':
-            cl.append('IS_BISULFITE_SEQUENCED=true')
-        if opts.ref <> None or opts.ref_file <> None:
-            cl.append('R=%s' % ref_file_name)
-        stdouts,rval = pic.runPic(opts.jar,cl)
-        if opts.datatype == 'sam':
-            pic.delme.append(tempbam)
-        newsam = opts.output
-        outformat = 'bam'
-        pe = open(pic.metricsOut,'r').readlines()
-        pic.cleanSam(insam=sortedfile, newsam=newsam, picardErrors=pe,outformat=outformat)
-        pic.delme.append(sortedfile) # not wanted
-        stf.close()
-        pic.cleanup()
-
-####liubo added CleanSam tool####
-    elif pic.picname == 'CleanSam':
-        # input
-        cl.append('INPUT=%s' % opts.input)
-        # output
-        cl.append('OUTPUT=%s' % tempout)
-        stdouts,rval = pic.runPic(opts.jar, cl)
-        haveTempout = True
-
-    elif pic.picname == 'SortSam':
-        cl.append('I=%s' % opts.input)
-        cl.append('O=%s' % tempout)
-        cl.append('SORT_ORDER=%s' % opts.sortorder)
-        stdouts,rval = pic.runPic(opts.jar,cl)
-        haveTempout = True
-
-    elif pic.picname == 'BuildBamIndex':
-        cl.append('I=%s' % opts.input)
-        cl.append('O=%s' % tempout)
-        stdouts,rval = pic.runPic(opts.jar,cl)
-        haveTempout = True
-
-    elif pic.picname == 'SamFormatConverter':
-        cl.append('INPUT=%s' % opts.input)
-        cl.append('OUTPUT=%s' % tempout)
-        pic.runPic(opts.jar, cl)
-        haveTempout = True
-    elif pic.picname == "DownsampleSam":
-        cl.append('I=%s' % opts.input)
-        mystring = opts.output
-        mystringsam = mystring + ".sam"
-        cl.append('O=%s' % mystringsam)
-        if float(opts.probability) > 0:
-            cl.append('PROBABILITY=%s' % opts.probability)
-        if float(opts.seed) > 0:
-            cl.append('RANDOM_SEED=%s' % opts.seed)
-        stdouts,rval = pic.runPic(opts.jar, cl)
-        myoutput = mystringsam.replace(".sam", "")
-        os.rename(mystringsam,myoutput)
-
-
-    else:
-        print >> sys.stderr,'picard.py got an unknown tool name - %s' % pic.picname
-        sys.exit(1)
-    if haveTempout:
-        # Some Picard tools produced a potentially intermediate bam file.
-        # Either just move to final location or create sam
-        if os.path.exists(tempout):
-            shutil.move(tempout, os.path.abspath(opts.output))
-    if opts.htmlout <> None or doFix: # return a pretty html page
-        pic.fixPicardOutputs(transpose=doTranspose,maxloglines=maxloglines)
-    if rval <> 0:
-        print >> sys.stderr, '## exit code=%d; stdout=%s' % (rval,stdouts)
-        # signal failure
-if __name__=="__main__": __main__()