0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 '''
|
|
4 FastQC checker for Galaxy biomedical data analysis platform
|
|
5
|
|
6 @author: Ilya Sytchev
|
|
7
|
|
8 Input: one or more files in fastq format
|
|
9 Output: sequencing quality report in text format
|
|
10
|
|
11 Requires FastQC 0.10.0 (http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/)
|
|
12
|
|
13 Partially based on:
|
|
14 fastqcwrapper (http://toolshed.g2.bx.psu.edu/repos/jjohnson/fastqc)
|
|
15 rgFastQC (https://bitbucket.org/galaxy/galaxy-dist/src/tip/tools/rgenetics/rgFastQC.py)
|
|
16
|
|
17 Tested with Python 2.6.1 and 2.7.2 on Mac OS 10.6.8
|
|
18 '''
|
|
19
|
|
20 import sys, os, optparse, tempfile, shutil, subprocess
|
|
21
|
|
22 def stop_err(msg, returncode=1):
|
|
23 sys.stderr.write(msg)
|
|
24 sys.exit(returncode)
|
|
25
|
|
26 def __main__():
|
|
27 usage = "Usage: %prog -e fastqc_executable -o output_file fastq_file [fastq_file ... ]"
|
|
28 version = "%prog 1.0.0"
|
|
29 op = optparse.OptionParser(usage=usage, version=version)
|
|
30 op.add_option('-e', '--executable', dest="executable", help="location of the FastQC program")
|
|
31 op.add_option('-o', '--output', dest="outfile", help="location of the output file")
|
|
32 (options, infiles) = op.parse_args()
|
|
33
|
|
34 # check if location of the FastQC program was provided
|
|
35 if options.executable == None:
|
|
36 op.error("Missing location of FastQC")
|
|
37
|
|
38 # check if FastQC program exists at the provided location
|
|
39 if not os.path.isfile(options.executable):
|
|
40 op.error("Cannot find FastQC at %s" % options.executable)
|
|
41
|
|
42 # check if any input files were provided
|
|
43 if infiles == None:
|
|
44 op.error("Missing input files")
|
|
45
|
|
46 # check if all input files exist
|
|
47 for f in infiles:
|
|
48 if not os.path.isfile(f):
|
|
49 op.error("Cannot find input file %s" % f)
|
|
50
|
|
51 # check if output file was provided
|
|
52 if options.outfile == None:
|
|
53 op.error("Missing output file name")
|
|
54
|
|
55 # assemble FastQC command line
|
|
56 cmd = [] # list is more secure than string for subprocess call
|
|
57 cmd.append(options.executable)
|
|
58 tmpdir = tempfile.mkdtemp() # create temp dir for FastQC output
|
|
59 cmd.extend(['-o', tmpdir])
|
|
60 cmd.extend(infiles)
|
|
61
|
|
62 # prepare files for FastQC stdout and stderr
|
|
63 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.err').name
|
|
64 tmp_stderr = open(tmp_stderr_name, 'w')
|
|
65 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=tmpdir, suffix='.out').name
|
|
66 tmp_stdout = open(tmp_stdout_name, 'w')
|
|
67 # run FastQC
|
|
68 try:
|
|
69 subprocess.check_call(cmd, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno())
|
|
70 except subprocess.CalledProcessError as e:
|
|
71 stop_err("Error executing FastQC\n", e.returncode)
|
|
72 finally:
|
|
73 tmp_stderr.close()
|
|
74 tmp_stdout.close()
|
|
75
|
|
76 outfile = open(options.outfile, 'w')
|
|
77
|
|
78 # parse all summary.txt files produced by FastQC and write results into the output file
|
|
79 for f in infiles:
|
|
80 filename = os.path.basename(f)
|
|
81 (datasetname, extension) = os.path.splitext(filename)
|
|
82 # Need to account for FastQC removing .fastq extension from input file names before using them to create output file and dir names
|
|
83 # Alternative solution is to iterate over report directories instead of input file names
|
|
84 if extension == '.fastq':
|
|
85 summaryfilename = os.path.join(tmpdir, datasetname + '_fastqc', 'summary.txt')
|
|
86 else:
|
|
87 summaryfilename = os.path.join(tmpdir, filename + '_fastqc', 'summary.txt')
|
|
88 outfile.write("%s results:\n" % datasetname)
|
|
89 # if summary file exists, process and add results to the output file
|
|
90 if os.path.isfile(summaryfilename):
|
|
91 summaryfile = open(summaryfilename, 'r')
|
|
92 for line in summaryfile:
|
|
93 (result, test) = line.split('\t')[:2]
|
|
94 outfile.write(result + '\t' + test + '\n')
|
|
95 summaryfile.close()
|
|
96 else:
|
|
97 outfile.write("FastQC summary report was not found at %s.\n" % summaryfilename)
|
|
98 outfile.write("\n")
|
|
99
|
|
100 outfile.close()
|
|
101
|
|
102 # clean up temp dir, put in a try block so we don't fail on stale nfs handles
|
|
103 try:
|
|
104 if os.path.exists(tmpdir):
|
|
105 shutil.rmtree(tmpdir)
|
|
106 except:
|
|
107 pass
|
|
108
|
|
109 if __name__ == '__main__': __main__()
|