# HG changeset patch
# User fubar
# Date 1370305824 14400
# Node ID 42251cbdeeac3847e5438adce50b652b26e5ec36
Initial commit of test for FastQC with installation of the java stuff
diff -r 000000000000 -r 42251cbdeeac FastQC/rgFastQC.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/FastQC/rgFastQC.py Mon Jun 03 20:30:24 2013 -0400
@@ -0,0 +1,197 @@
+"""
+# May 2013 ross added check for bogus gz extension - fastqc gets confused
+# added sanitizer for user supplied name
+# removed shell and make cl a sequence for Popen call
+# ross lazarus August 10 2012 in response to anon insecurity report
+wrapper for fastqc
+
+called as
+
+ rgFastqc.py -i $input_file -d $html_file.files_path -o $html_file -n "$out_prefix"
+
+
+
+
+Current release seems overly intolerant of sam/bam header strangeness
+Author notified...
+
+
+"""
+import re
+import os
+import sys
+import subprocess
+import optparse
+import shutil
+import tempfile
+from rgutils import getFileString
+import zipfile
+import gzip
+
+class FastQC():
+ """wrapper
+ """
+
+
+ def __init__(self,opts=None):
+ assert opts <> None
+ self.opts = opts
+
+
+ def run_fastqc(self):
+ """
+ In batch mode fastqc behaves not very nicely - will write to a new folder in
+ the same place as the infile called [infilebasename]_fastqc
+ rlazarus@omics:/data/galaxy/test$ ls FC041_1_sequence_fastqc
+ duplication_levels.png fastqc_icon.png per_base_n_content.png per_sequence_gc_content.png summary.txt
+ error.png fastqc_report.html per_base_quality.png per_sequence_quality.png tick.png
+ fastqc_data.txt per_base_gc_content.png per_base_sequence_content.png sequence_length_distribution.png warning.png
+
+ """
+ serr = ''
+ dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir)
+ sout = open(tlog, 'w')
+ fastq = os.path.basename(self.opts.input)
+ cl = [self.opts.executable,'--outdir=%s' % self.opts.outputdir]
+ if self.opts.informat in ['sam','bam']:
+ cl.append('--f=%s' % self.opts.informat)
+ if self.opts.contaminants <> None :
+ cl.append('--contaminants=%s' % self.opts.contaminants)
+ # patch suggested by bwlang https://bitbucket.org/galaxy/galaxy-central/pull-request/30
+ # use a symlink in a temporary directory so that the FastQC report reflects the history input file name
+ # note this exposes a bug in the EBI_SRA download tool which leaves bogus .gz extensions on uncompressed files
+ # which fastqc helpfully tries to uncompress again - hilarity ensues.
+ # patched may 29 2013 until this is fixed properly
+ infname = self.opts.inputfilename
+ linf = infname.lower()
+ trimext = False
+ if ( linf.endswith('.gz') or linf.endswith('.gzip') ):
+ f = gzip.open(self.opts.input)
+ try:
+ testrow = f.readline()
+ except:
+ trimext = True
+ f.close()
+ elif linf.endswith('bz2'):
+ f = bz2.open(self.opts.input,'rb')
+ try:
+ f.readline()
+ except:
+ trimext = True
+ f.close()
+ elif linf.endswith('.zip'):
+ if not zipfile.is_zipfile(self.opts.input):
+ trimext = True
+ if trimext:
+ infname = os.path.splitext(infname)[0]
+ fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname))
+ link_name = os.path.join(self.opts.outputdir, fastqinfilename)
+ os.symlink(self.opts.input, link_name)
+ cl.append(link_name)
+ sout.write('# FastQC cl = %s\n' % ' '.join(cl))
+ sout.flush()
+ p = subprocess.Popen(cl, shell=False, stderr=sout, stdout=sout, cwd=self.opts.outputdir)
+ retval = p.wait()
+ sout.close()
+ runlog = open(tlog,'r').readlines()
+ os.unlink(link_name)
+ flist = os.listdir(self.opts.outputdir) # fastqc plays games with its output directory name. eesh
+ odpath = None
+ for f in flist:
+ d = os.path.join(self.opts.outputdir,f)
+ if os.path.isdir(d):
+ if d.endswith('_fastqc'):
+ odpath = d
+ hpath = None
+ if odpath <> None:
+ try:
+ hpath = os.path.join(odpath,'fastqc_report.html')
+ rep = open(hpath,'r').readlines() # for our new html file but we need to insert our stuff after the
tag
+ except:
+ pass
+ if hpath == None:
+ serr = '\n'.join(runlog)
+ res = ['## odpath=%s: No output found in %s. Output for the run was:Files created by FastQC
\n']
+ flist.sort()
+ for i,f in enumerate(flist):
+ if not(os.path.isdir(f)):
+ fn = os.path.split(f)[-1]
+ res.append('%s |
\n' % (fn,getFileString(fn, self.opts.outputdir)))
+ res.append('
\n')
+ res.append('
FastQC documentation and full attribution is here
\n')
+ res.append('FastQC was run by Galaxy using the rgenetics rgFastQC wrapper - see http://rgenetics.org for details and licensing\n
')
+ res.append(footer)
+ fixed = rep[:bodyindex] + res + rep[bodyindex:]
+ return fixed # with our additions
+
+
+ def fix_fastqcimages(self,odpath):
+ """ Galaxy wants everything in the same files_dir
+ """
+ icpath = os.path.join(odpath,'Icons')
+ impath = os.path.join(odpath,'Images')
+ for adir in [icpath,impath,odpath]:
+ if os.path.exists(adir):
+ flist = os.listdir(adir) # get all files created
+ for f in flist:
+ if not os.path.isdir(os.path.join(adir,f)):
+ sauce = os.path.join(adir,f)
+ dest = os.path.join(self.opts.outputdir,f)
+ shutil.move(sauce,dest)
+ os.rmdir(adir)
+
+
+
+if __name__ == '__main__':
+ op = optparse.OptionParser()
+ op.add_option('-i', '--input', default=None)
+ op.add_option('-j', '--inputfilename', default=None)
+ op.add_option('-o', '--htmloutput', default=None)
+ op.add_option('-d', '--outputdir', default="/tmp/shortread")
+ op.add_option('-f', '--informat', default='fastq')
+ op.add_option('-n', '--namejob', default='rgFastQC')
+ op.add_option('-c', '--contaminants', default=None)
+ op.add_option('-e', '--executable', default='fastqc')
+ opts, args = op.parse_args()
+ assert opts.input <> None
+ assert os.path.isfile(opts.executable),'##rgFastQC.py error - cannot find executable %s' % opts.executable
+ if not os.path.exists(opts.outputdir):
+ os.makedirs(opts.outputdir)
+ f = FastQC(opts)
+ html,retval,serr = f.run_fastqc()
+ f = open(opts.htmloutput, 'w')
+ f.write(''.join(html))
+ f.close()
+ if retval <> 0:
+ print >> sys.stderr, serr # indicate failure
+
+
+
diff -r 000000000000 -r 42251cbdeeac FastQC/rgFastQC.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/FastQC/rgFastQC.xml Mon Jun 03 20:30:24 2013 -0400
@@ -0,0 +1,102 @@
+