Mercurial > repos > hackdna > fastqc
comparison fastqc_checker.py @ 0:d8d131d08779 draft default tip
Initial upload.
| author | hackdna |
|---|---|
| date | Tue, 21 May 2013 11:48:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d8d131d08779 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 ''' | |
| 4 FastQC checker for Galaxy biomedical data analysis platform | |
| 5 | |
| 6 @author: Ilya Sytchev | |
| 7 | |
| 8 Input: one or more files in fastq format | |
| 9 Output: sequencing quality report in text format | |
| 10 | |
| 11 Requires FastQC 0.10.0 (http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/) | |
| 12 | |
| 13 Partially based on: | |
| 14 fastqcwrapper (http://toolshed.g2.bx.psu.edu/repos/jjohnson/fastqc) | |
| 15 rgFastQC (https://bitbucket.org/galaxy/galaxy-dist/src/tip/tools/rgenetics/rgFastQC.py) | |
| 16 | |
| 17 Tested with Python 2.6.1 and 2.7.2 on Mac OS 10.6.8 | |
| 18 ''' | |
| 19 | |
| 20 import sys, os, optparse, tempfile, shutil, subprocess | |
| 21 | |
| 22 def stop_err(msg, returncode=1): | |
| 23 sys.stderr.write(msg) | |
| 24 sys.exit(returncode) | |
| 25 | |
| 26 def __main__(): | |
| 27 usage = "Usage: %prog -e fastqc_executable -o output_file fastq_file [fastq_file ... ]" | |
| 28 version = "%prog 1.0.0" | |
| 29 op = optparse.OptionParser(usage=usage, version=version) | |
| 30 op.add_option('-e', '--executable', dest="executable", help="location of the FastQC program") | |
| 31 op.add_option('-o', '--output', dest="outfile", help="location of the output file") | |
| 32 (options, infiles) = op.parse_args() | |
| 33 | |
| 34 # check if location of the FastQC program was provided | |
| 35 if options.executable == None: | |
| 36 op.error("Missing location of FastQC") | |
| 37 | |
| 38 # check if FastQC program exists at the provided location | |
| 39 if not os.path.isfile(options.executable): | |
| 40 op.error("Cannot find FastQC at %s" % options.executable) | |
| 41 | |
| 42 # check if any input files were provided | |
| 43 if infiles == None: | |
| 44 op.error("Missing input files") | |
| 45 | |
| 46 # check if all input files exist | |
| 47 for f in infiles: | |
| 48 if not os.path.isfile(f): | |
| 49 op.error("Cannot find input file %s" % f) | |
| 50 | |
| 51 # check if output file was provided | |
| 52 if options.outfile == None: | |
| 53 op.error("Missing output file name") | |
| 54 | |
| 55 # assemble FastQC command line | |
| 56 cmd = [] # list is more secure than string for subprocess call | |
| 57 cmd.append(options.executable) | |
| 58 tmpdir = tempfile.mkdtemp() # create temp dir for FastQC output | |
| 59 cmd.extend(['-o', tmpdir]) | |
| 60 cmd.extend(infiles) | |
| 61 | |
| 62 # prepare files for FastQC stdout and stderr | |
| 63 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.err').name | |
| 64 tmp_stderr = open(tmp_stderr_name, 'w') | |
| 65 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.out').name | |
| 66 tmp_stdout = open(tmp_stdout_name, 'w') | |
| 67 # run FastQC | |
| 68 try: | |
| 69 subprocess.check_call(cmd, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno()) | |
| 70 except subprocess.CalledProcessError as e: | |
| 71 stop_err("Error executing FastQC\n", e.returncode) | |
| 72 finally: | |
| 73 tmp_stderr.close() | |
| 74 tmp_stdout.close() | |
| 75 | |
| 76 outfile = open(options.outfile, 'w') | |
| 77 | |
| 78 # parse all summary.txt files produced by FastQC and write results into the output file | |
| 79 for f in infiles: | |
| 80 filename = os.path.basename(f) | |
| 81 (datasetname, extension) = os.path.splitext(filename) | |
| 82 # Need to account for FastQC removing .fastq extension from input file names before using them to create output file and dir names | |
| 83 # Alternative solution is to iterate over report directories instead of input file names | |
| 84 if extension == '.fastq': | |
| 85 summaryfilename = os.path.join(tmpdir, datasetname + '_fastqc', 'summary.txt') | |
| 86 else: | |
| 87 summaryfilename = os.path.join(tmpdir, filename + '_fastqc', 'summary.txt') | |
| 88 outfile.write("%s results:\n" % datasetname) | |
| 89 # if summary file exists, process and add results to the output file | |
| 90 if os.path.isfile(summaryfilename): | |
| 91 summaryfile = open(summaryfilename, 'r') | |
| 92 for line in summaryfile: | |
| 93 (result, test) = line.split('\t')[:2] | |
| 94 outfile.write(result + '\t' + test + '\n') | |
| 95 summaryfile.close() | |
| 96 else: | |
| 97 outfile.write("FastQC summary report was not found at %s.\n" % summaryfilename) | |
| 98 outfile.write("\n") | |
| 99 | |
| 100 outfile.close() | |
| 101 | |
| 102 # clean up temp dir, put in a try block so we don't fail on stale nfs handles | |
| 103 try: | |
| 104 if os.path.exists(tmpdir): | |
| 105 shutil.rmtree(tmpdir) | |
| 106 except: | |
| 107 pass | |
| 108 | |
| 109 if __name__ == '__main__': __main__() |
